#include "tnl/t_vp_build.h"
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
-#include "tnl/t_pipeline.h"
+#include "vbo/vbo_context.h"
-#include "radeon_mipmap_tree.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
#include "r700_fragprog.h"
#include "r700_state.h"
+#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
+
void r700WaitForIdle(context_t *context);
void r700WaitForIdleClean(context_t *context);
-void r700Start3D(context_t *context);
-GLboolean r700SendTextureState(context_t *context);
-unsigned int r700PrimitiveType(int prim);
-void r600UpdateTextureState(GLcontext * ctx);
+static unsigned int r700PrimitiveType(int prim);
GLboolean r700SyncSurf(context_t *context,
struct radeon_bo *pbo,
uint32_t read_domain,
void r700WaitForIdle(context_t *context)
{
BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
BEGIN_BATCH_NO_AUTOSTATE(3);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
void r700WaitForIdleClean(context_t *context)
{
BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
BEGIN_BATCH_NO_AUTOSTATE(5);
R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
void r700Start3D(context_t *context)
{
BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
{
BEGIN_BATCH_NO_AUTOSTATE(2);
END_BATCH();
COMMIT_BATCH();
-
- r700WaitForIdleClean(context);
-}
-
-static GLboolean r700SetupShaders(GLcontext * ctx)
-{
- context_t *context = R700_CONTEXT(ctx);
-
- R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
-
- GLuint exportCount;
-
- r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
- r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
-
- SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
- SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
-
- r700SetupVertexProgram(ctx);
-
- r700SetupFragmentProgram(ctx);
-
- exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
- r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
-
- return GL_TRUE;
-}
-
-GLboolean r700SendTextureState(context_t *context)
-{
- unsigned int i;
- R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
- offset_modifiers offset_mod = {NO_SHIFT, 0, 0xFFFFFFFF};
- struct radeon_bo *bo = NULL;
- BATCH_LOCALS(&context->radeon);
-
- for (i=0; i<R700_TEXTURE_NUMBERUNITS; i++) {
- radeonTexObj *t = r700->textures[i];
- if (t) {
- if (!t->image_override)
- bo = t->mt->bo;
- else
- bo = t->bo;
- if (bo) {
-
- r700SyncSurf(context, bo,
- RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
- 0, TC_ACTION_ENA_bit);
-
- BEGIN_BATCH_NO_AUTOSTATE(9);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
- R600_OUT_BATCH(i * 7);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
- R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
- bo,
- 0,
- RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod);
- R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
- bo,
- 0,
- RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
- END_BATCH();
-
- BEGIN_BATCH_NO_AUTOSTATE(5);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
- R600_OUT_BATCH(i * 3);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
- R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
- END_BATCH();
- COMMIT_BATCH();
- }
- }
- }
- return GL_TRUE;
}
GLboolean r700SyncSurf(context_t *context,
uint32_t sync_type)
{
BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
uint32_t cp_coher_size;
- offset_modifiers offset_mod;
+
+ if (!pbo)
+ return GL_FALSE;
if (pbo->size == 0xffffffff)
cp_coher_size = 0xffffffff;
else
cp_coher_size = ((pbo->size + 255) >> 8);
- offset_mod.shift = NO_SHIFT;
- offset_mod.shiftbits = 0;
- offset_mod.mask = 0xFFFFFFFF;
-
- BEGIN_BATCH_NO_AUTOSTATE(5);
+ BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
R600_OUT_BATCH(sync_type);
R600_OUT_BATCH(cp_coher_size);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(10);
R600_OUT_BATCH_RELOC(0,
pbo,
0,
- read_domain, write_domain, 0, &offset_mod); // ???
- R600_OUT_BATCH(10);
-
+ read_domain, write_domain, 0);
END_BATCH();
COMMIT_BATCH();
return GL_TRUE;
}
-unsigned int r700PrimitiveType(int prim)
+static unsigned int r700PrimitiveType(int prim)
{
switch (prim & PRIM_MODE_MASK)
{
}
}
-static GLboolean r700RunRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage)
+static int r700NumVerts(int num_verts, int prim)
+{
+ int verts_off = 0;
+
+ switch (prim & PRIM_MODE_MASK) {
+ case GL_POINTS:
+ verts_off = 0;
+ break;
+ case GL_LINES:
+ verts_off = num_verts % 2;
+ break;
+ case GL_LINE_STRIP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_LINE_LOOP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLES:
+ verts_off = num_verts % 3;
+ break;
+ case GL_TRIANGLE_STRIP:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLE_FAN:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_QUADS:
+ verts_off = num_verts % 4;
+ break;
+ case GL_QUAD_STRIP:
+ if (num_verts < 4)
+ verts_off = num_verts;
+ else
+ verts_off = num_verts % 2;
+ break;
+ case GL_POLYGON:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+
+ return num_verts - verts_off;
+}
+
+static void r700RunRenderPrimitive(struct gl_context * ctx, int start, int end,
+ int prim, GLint basevertex)
{
context_t *context = R700_CONTEXT(ctx);
- R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
- int lastIndex = 0;
BATCH_LOCALS(&context->radeon);
+ int type, total_emit;
+ int num_indices;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
- unsigned int i, j;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
+ type = r700PrimitiveType(prim);
+ num_indices = r700NumVerts(end - start, prim);
- r700Start3D(context); /* TODO : this is too much. */
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
- r700SendSQConfig(context);
+ if (type < 0 || num_indices <= 0)
+ return;
- r700UpdateShaders(ctx);
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
- r700SetScissor(context);
- r700SetRenderTarget(context, 0);
- r700SetDepthTarget(context);
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
- if(r700SetupStreams(ctx))
+ if(GL_TRUE != context->ind_buf.is_32bit)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+
+ /* 16-bit indexes are packed in a 32-bit value */
+ SETfield(vgt_index_type,
+#if MESA_BIG_ENDIAN
+ VGT_DMA_SWAP_32_BIT,
+#else
+ VGT_DMA_SWAP_NONE,
+#endif
+ SWAP_MODE_shift, SWAP_MODE_mask);
+
+
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ + 5 + 2; /* DRAW_INDEX */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
+ // draw packet
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
+ R600_OUT_BATCH(context->ind_buf.bo_offset);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
+ context->ind_buf.bo,
+ context->ind_buf.bo_offset,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, int end, int prim)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type;
+ uint32_t num_indices, total_emit = 0;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+
+ type = r700PrimitiveType(prim);
+ num_indices = r700NumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
+
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+ if (num_indices > 0xffff)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+ else
{
- return GL_TRUE;
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
}
- r600UpdateTextureState(ctx);
- r700SendTextureState(context);
+ /* 16-bit indexes are packed in a 32-bit value */
+ SETfield(vgt_index_type,
+#if MESA_BIG_ENDIAN
+ VGT_DMA_SWAP_32_BIT,
+#else
+ VGT_DMA_SWAP_NONE,
+#endif
+ SWAP_MODE_shift, SWAP_MODE_mask);
+
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+
+ total_emit += 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ + 3; /* DRAW */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(start); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
+ // draw packet
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+/* start 3d, idle, cb/db flush */
+#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14
+
+static GLuint r700PredictRenderSize(struct gl_context* ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint nr_prims)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ GLboolean flushed;
+ GLuint dwords, i;
+ GLuint state_size;
+
+ dwords = PRE_EMIT_STATE_BUFSZ;
+ if (ib)
+ dwords += nr_prims * 18;
+ else {
+ for (i = 0; i < nr_prims; ++i)
+ {
+ dwords += 14;
+ }
+ }
- r700SetupShaders(ctx);
+ state_size = radeonCountStateEmitSize(&context->radeon);
+ flushed = rcommonEnsureCmdBufSpace(&context->radeon,
+ dwords + state_size,
+ __FUNCTION__);
+ if (flushed)
+ dwords += radeonCountStateEmitSize(&context->radeon);
+ else
+ dwords += state_size;
- r700SendFSState(context); // FIXME just a place holder for now
- r700SendPSState(context);
- r700SendVSState(context);
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ return dwords;
- r700SendUCPState(context);
- r700SendContextStates(context);
- r700SendViewportState(context, 0);
- r700SendRenderTargetState(context, 0);
- r700SendDepthTargetState(context);
+}
- /* richard test code */
- for (i = 0; i < vb->PrimitiveCount; i++)
+#define CONVERT( TYPE, MACRO ) do { \
+ GLuint i, j, sz; \
+ sz = input->Size; \
+ if (input->Normalized) { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = MACRO(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } else { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = (GLfloat)(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } \
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r700ConvertAttrib(struct gl_context *ctx, int count,
+ const struct gl_client_array *input,
+ struct StreamDesc *attr)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ const GLvoid *src_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+ GLfloat *dst_ptr;
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ {
+ count = 1;
+ }
+
+ if (input->BufferObj->Name)
{
- GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
- GLuint start = vb->Primitive[i].start;
- GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
- GLuint numIndices = vb->Primitive[i].count;
- GLuint numEntires;
-
- unsigned int VGT_DRAW_INITIATOR = 0;
- unsigned int VGT_INDEX_TYPE = 0;
- unsigned int VGT_PRIMITIVE_TYPE = 0;
- unsigned int VGT_NUM_INDICES = 0;
-
- if (numIndices < 1)
- continue;
-
- numEntires = 3 /* VGT_PRIMITIVE_TYPE */
- + 2 /* VGT_INDEX_TYPE */
- + 2 /* NUM_INSTANCES */
- + numIndices + 3; /* DRAW_INDEX_IMMD */
-
- BEGIN_BATCH_NO_AUTOSTATE(numEntires);
-
- // prim
- VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift;
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
- R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
- R600_OUT_BATCH(VGT_PRIMITIVE_TYPE);
-
- // index type
- VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift;
- R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
- R600_OUT_BATCH(VGT_INDEX_TYPE);
-
- // num instances
- R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
- R600_OUT_BATCH(1);
-
- // draw packet
- VGT_NUM_INDICES = numIndices;
- VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift;
- VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift;
-
- R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1)));
- R600_OUT_BATCH(VGT_NUM_INDICES);
- R600_OUT_BATCH(VGT_DRAW_INITIATOR);
-
- for (j = lastIndex; j < lastIndex + numIndices; j++)
+ if (!input->BufferObj->Pointer)
{
- R600_OUT_BATCH(j);
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
}
- lastIndex += numIndices;
- END_BATCH();
- COMMIT_BATCH();
+ src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ }
+ else
+ {
+ src_ptr = input->Ptr;
}
- /* Flush render op cached for last several quads. */
- r700WaitForIdleClean(context);
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
+ sizeof(GLfloat) * input->Size * count, 32);
+
+ radeon_bo_map(attr->bo, 1);
+
+ dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
- radeonReleaseArrays(ctx, 0);
+ assert(src_ptr != NULL);
- rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ );
+ switch (input->Type)
+ {
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ assert(input->Format != GL_BGRA);
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ break;
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
- return GL_FALSE;
+ radeon_bo_unmap(attr->bo);
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
}
-static GLboolean r700RunNonTCLRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage) /* -------------------- */
+#if 0 /* unused */
+static void r700AlignDataToDword(struct gl_context *ctx,
+ const struct gl_client_array *input,
+ int count,
+ struct StreamDesc *attr)
{
- GLboolean bRet = GL_TRUE;
-
- return bRet;
+ context_t *context = R700_CONTEXT(ctx);
+ const int dst_stride = (input->StrideB + 3) & ~3;
+ const int size = getTypeSize(input->Type) * input->Size * count;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+ radeon_bo_map(attr->bo, 1);
+
+ if (!input->BufferObj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ {
+ GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+ int i;
+
+ for (i = 0; i < count; ++i)
+ {
+ memcpy(dst_ptr, src_ptr, input->StrideB);
+ src_ptr += input->StrideB;
+ dst_ptr += dst_stride;
+ }
+ }
+
+ radeon_bo_unmap(attr->bo);
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+
+ attr->stride = dst_stride;
}
+#endif
-static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
- struct tnl_pipeline_stage *stage)
+static void r700SetupStreams(struct gl_context *ctx, const struct gl_client_array *input[], int count)
{
- GLboolean bRet = GL_FALSE;
+ context_t *context = R700_CONTEXT(ctx);
+ GLuint stride;
+ int ret;
+ int i, index;
- /* TODO : sw fallback */
+ R600_STATECHANGE(context, vtx);
- /**
- * Ensure all enabled and complete textures are uploaded along with any buffers being used.
- */
- if(!r600ValidateBuffers(ctx))
+ for(index = 0; index < context->nNumActiveAos; index++)
{
- return GL_TRUE;
+ struct radeon_aos *aos = &context->radeon.tcl.aos[index];
+ i = context->stream_desc[index].element;
+
+ stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT
+#if MESA_BIG_ENDIAN
+ || getTypeSize(input[i]->Type) != 4
+#endif
+ )
+ {
+ assert(count);
+ r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
+ }
+ else
+ {
+ if (input[i]->BufferObj->Name)
+ {
+ context->stream_desc[index].stride = input[i]->StrideB;
+ context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+ context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ context->stream_desc[index].is_named_bo = GL_TRUE;
+ }
+ else
+ {
+ int size;
+ int local_count = count;
+ uint32_t *dst;
+
+ if (input[i]->StrideB == 0)
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size;
+ local_count = 1;
+ }
+ else
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+ }
+
+ radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
+ &context->stream_desc[index].bo_offset, size, 32);
+
+ radeon_bo_map(context->stream_desc[index].bo, 1);
+ assert(context->stream_desc[index].bo->ptr != NULL);
+
+
+ dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
+ context->stream_desc[index].bo_offset);
+
+ switch (context->stream_desc[index].dwords)
+ {
+ case 1:
+ radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 2:
+ radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 3:
+ radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 4:
+ radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ radeon_bo_unmap(context->stream_desc[index].bo);
+ }
+ }
+
+ aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
+ aos->stride = context->stream_desc[index].stride / sizeof(float);
+ aos->components = context->stream_desc[index].dwords;
+ aos->bo = context->stream_desc[index].bo;
+ aos->offset = context->stream_desc[index].bo_offset;
+
+ if(context->stream_desc[index].is_named_bo)
+ {
+ radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
+ context->stream_desc[index].bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ }
+
+ ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
+ first_elem(&context->radeon.dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+}
+
+static void r700FreeData(struct gl_context *ctx)
+{
+ /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+ * to prevent double unref in radeonReleaseArrays
+ * called during context destroy
+ */
+ context_t *context = R700_CONTEXT(ctx);
+
+ int i;
+
+ for (i = 0; i < context->nNumActiveAos; i++)
+ {
+ if (!context->stream_desc[i].is_named_bo)
+ {
+ radeon_bo_unref(context->stream_desc[i].bo);
+ }
+ context->radeon.tcl.aos[i].bo = NULL;
+ }
+
+ if (context->ind_buf.bo != NULL)
+ {
+ radeon_bo_unref(context->ind_buf.bo);
+ }
+}
+
+static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ GLvoid *src_ptr;
+ GLuint *out;
+ int i;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ mapped_named_bo = GL_TRUE;
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ }
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
+ {
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+ GLubyte *in = (GLubyte *)src_ptr;
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+
+ radeon_bo_map(context->ind_buf.bo, 1);
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+
+ radeon_bo_unmap(context->ind_buf.bo);
+#if MESA_BIG_ENDIAN
}
+ else
+ { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+ GLushort *in = (GLushort *)src_ptr;
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+ radeon_bo_unmap(context->ind_buf.bo);
+#endif
+ }
+
+ context->ind_buf.is_32bit = GL_FALSE;
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+}
+
+static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
context_t *context = R700_CONTEXT(ctx);
+ if (!mesa_ind_buf) {
+ context->ind_buf.bo = NULL;
+ return;
+ }
+
+#if MESA_BIG_ENDIAN
+ if (mesa_ind_buf->type == GL_UNSIGNED_INT)
+#else
+ if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
+#endif
+ {
+ const GLvoid *src_ptr;
+ GLvoid *dst_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
+ assert(context->ind_buf.bo->ptr != NULL);
+ dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ memcpy(dst_ptr, src_ptr, size);
+
+ radeon_bo_unmap(context->ind_buf.bo);
+ context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+ }
+ else
+ {
+ r700FixupIndexBuffer(ctx, mesa_ind_buf);
+ }
+}
+
+static GLboolean check_fallbacks(struct gl_context *ctx)
+{
+ if (ctx->RenderMode != GL_RENDER)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+static GLboolean r700TryDrawPrims(struct gl_context *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
+{
+ context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ GLuint i, id = 0;
+ struct radeon_renderbuffer *rrb;
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ if (check_fallbacks(ctx))
+ return GL_FALSE;
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+ r700SetVertexFormat(ctx, arrays, max_index + 1);
+ /* shaders need to be updated before buffers are validated */
r700UpdateShaders(ctx);
+ if (!r600ValidateBuffers(ctx))
+ return GL_FALSE;
- bRet = r700RunRender(ctx, stage);
+ /* always emit CB base to prevent
+ * lock ups on some chips.
+ */
+ R600_STATECHANGE(context, cb_target);
+ /* mark vtx as dirty since it changes per-draw */
+ R600_STATECHANGE(context, vtx);
- return bRet;
- //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
- //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
+ r700SetScissor(context);
+ r700SetupVertexProgram(ctx);
+ r700SetupFragmentProgram(ctx);
+ r700UpdateShaderStates(ctx);
+
+ GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims)
+ + context->radeon.cmdbuf.cs->cdw;
+
+ r700SetupIndexBuffer(ctx, ib);
+ r700SetupStreams(ctx, arrays, max_index + 1);
+
+ radeonEmitState(radeon);
+
+ radeon_debug_add_indent();
+ for (i = 0; i < nr_prims; ++i)
+ {
+ if (context->ind_buf.bo)
+ r700RunRenderPrimitive(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode,
+ prim[i].basevertex);
+ else
+ r700RunRenderPrimitiveImmediate(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
+ }
+ radeon_debug_remove_indent();
+
+ /* Flush render op cached for last several quads. */
+ /* XXX drm should handle this in fence submit */
+ r700WaitForIdleClean(context);
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ CB_ACTION_ENA_bit | (1 << (id + 6)));
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
+
+ r700FreeData(ctx);
+
+ if (emit_end < context->radeon.cmdbuf.cs->cdw)
+ {
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
+ }
+
+ return GL_TRUE;
}
-const struct tnl_pipeline_stage _r700_render_stage = {
- "r700 Hardware Rasterization",
- NULL,
- NULL,
- NULL,
- NULL,
- r700RunNonTCLRender
-};
-
-const struct tnl_pipeline_stage _r700_tcl_stage = {
- "r700 Hardware Transform, Clipping and Lighting",
- NULL,
- NULL,
- NULL,
- NULL,
- r700RunTCLRender
-};
-
-const struct tnl_pipeline_stage *r700_pipeline[] =
+static void r700DrawPrims(struct gl_context *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
{
- &_r700_tcl_stage,
- &_tnl_vertex_transform_stage,
- &_tnl_normal_transform_stage,
- &_tnl_lighting_stage,
- &_tnl_fog_coordinate_stage,
- &_tnl_texgen_stage,
- &_tnl_texture_transform_stage,
- &_tnl_vertex_program_stage,
-
- &_r700_render_stage,
- &_tnl_render_stage,
- 0,
-};
+ GLboolean retval = GL_FALSE;
+
+ context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ radeon_prepare_render(radeon);
+
+ /* This check should get folded into just the places that
+ * min/max index are really needed.
+ */
+
+ if (!vbo_all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ /* do we want to rebase, minimizes the
+ * amount of data to upload? */
+ if (min_index) {
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
+ return;
+ }
+ }
+ /* Make an attempt at drawing */
+ retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+ /* If failed run tnl pipeline - it should take care of fallbacks */
+ if (!retval) {
+ _swsetup_Wakeup(ctx);
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+ }
+}
+
+void r700InitDraw(struct gl_context *ctx)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ /* to be enabled */
+ vbo->draw_prims = r700DrawPrims;
+}