X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr600%2Fr700_render.c;h=c345b9d8ac9b8075e811aaa8f68cd52af495bf74;hb=898de4a9d5e47ed32c600e5907476fd9338aa7e9;hp=d64e921bdad8bee72ffea85700e0d2efe9d59b7a;hpb=ce7ed63f0c2f5cb1c030f73a5d8f6544eaac0f81;p=mesa.git diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index d64e921bdad..c345b9d8ac9 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -43,6 +43,7 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" +#include "vbo/vbo_context.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -53,6 +54,7 @@ #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_buffer_objects.h" #include "radeon_common_context.h" void r700WaitForIdle(context_t *context); @@ -69,6 +71,7 @@ GLboolean r700SyncSurf(context_t *context, void r700WaitForIdle(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(3); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); @@ -82,6 +85,7 @@ void r700WaitForIdle(context_t *context) void r700WaitForIdleClean(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); @@ -98,6 +102,7 @@ void r700WaitForIdleClean(context_t *context) void r700Start3D(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { BEGIN_BATCH_NO_AUTOSTATE(2); @@ -124,6 +129,7 @@ GLboolean r700SyncSurf(context_t *context, uint32_t sync_type) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); uint32_t cp_coher_size; if (!pbo) @@ -143,8 +149,7 @@ GLboolean r700SyncSurf(context_t *context, R600_OUT_BATCH_RELOC(0, pbo, 0, - read_domain, write_domain, 0); // ??? - + read_domain, write_domain, 0); END_BATCH(); COMMIT_BATCH(); @@ -246,106 +251,635 @@ static int r700NumVerts(int num_verts, int prim) static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - int type, i, total_emit; - int num_indices; - uint32_t vgt_draw_initiator = 0; - uint32_t vgt_index_type = 0; - uint32_t vgt_primitive_type = 0; - uint32_t vgt_num_indices = 0; - - type = r700PrimitiveType(prim); - num_indices = r700NumVerts(end - start, prim); - - if (type < 0 || num_indices <= 0) - return; + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + + if(GL_TRUE != context->ind_buf.is_32bit) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim) +{ + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i; + uint32_t num_indices, total_emit = 0; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ - - BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim - SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); - - // index type - SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(vgt_index_type); - - // num instances - R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); - R600_OUT_BATCH(1); - - // draw packet - vgt_num_indices = num_indices; - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + if (num_indices > 0xffff) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + else + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + if (start == 0) + { + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + else + { + if (num_indices > 0xffff) + { + total_emit += num_indices; + } + else + { + total_emit += (num_indices + 1) / 2; + } + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + + total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 3; /* DRAW */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + if(start == 0) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); + } + else + { + if (num_indices > 0xffff) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i++) + { + R600_OUT_BATCH(i); + } + } + else + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i += 2) + { + if ((i + 1) == (start + num_indices)) + { + R600_OUT_BATCH(i); + } + else + { + R600_OUT_BATCH(((i + 1) << 16) | (i)); + } + } + } + } - for (i = start; i < (start + num_indices); i++) { - R600_OUT_BATCH(i); - } - END_BATCH(); - COMMIT_BATCH(); - + END_BATCH(); + COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx) +static GLuint r700PredictRenderSize(GLcontext* ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint nr_prims) { context_t *context = R700_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; - struct vertex_buffer *vb = &tnl->vb; GLboolean flushed; GLuint dwords, i; GLuint state_size; - /* pre calculate aos count so state prediction works */ - context->radeon.tcl.aos_count = _mesa_bitcount(vpc->mesa_program.Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; - for (i = 0; i < vb->PrimitiveCount; i++) - dwords += vb->Primitive[i].count + 10; + if (ib) + dwords += nr_prims * 14; + else { + for (i = 0; i < nr_prims; ++i) + { + if (prim[i].start == 0) + dwords += 10; + else if (prim[i].count > 0xffff) + dwords += prim[i].count + 10; + else + dwords += ((prim[i].count + 1) / 2) + 10; + } + } + state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, - dwords + state_size, __FUNCTION__); - + dwords + state_size, + __FUNCTION__); if (flushed) - dwords += radeonCountStateEmitSize(&context->radeon); + dwords += radeonCountStateEmitSize(&context->radeon); else - dwords += state_size; + dwords += state_size; - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; + +} + +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r700ConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } } -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static void r700AlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + R600_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + assert(context->stream_desc[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + default: + assert(0); + break; + } + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void r700FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = R700_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + radeon_bo_unref(context->ind_buf.bo); + } +} + +static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + +static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) + { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + _mesa_memcpy(dst_ptr, src_ptr, size); + + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + r700FixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +static GLboolean r700TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) { context_t *context = R700_CONTEXT(ctx); radeonContextPtr radeon = &context->radeon; - unsigned int i, id = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; + GLuint i, id = 0; struct radeon_renderbuffer *rrb; - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: cs begin at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); + if (ctx->NewState) + _mesa_update_state( ctx ); + + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + /* shaders need to be updated before buffers are validated */ + r700UpdateShaders(ctx); + if (!r600ValidateBuffers(ctx)) + return GL_FALSE; /* always emit CB base to prevent * lock ups on some chips. @@ -354,25 +888,34 @@ static GLboolean r700RunRender(GLcontext * ctx, /* mark vtx as dirty since it changes per-draw */ R600_STATECHANGE(context, vtx); - r700UpdateShaders(ctx); r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); r600UpdateTextureState(ctx); - GLuint emit_end = r700PredictRenderSize(ctx) - + context->radeon.cmdbuf.cs->cdw; - r700SetupStreams(ctx); + GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims) + + context->radeon.cmdbuf.cs->cdw; + + r700SetupIndexBuffer(ctx, ib); + r700SetupStreams(ctx, arrays, max_index + 1); radeonEmitState(radeon); - /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r700RunRenderPrimitive(ctx, start, end, prim); + radeon_debug_add_indent(); + for (i = 0; i < nr_prims; ++i) + { + if (context->ind_buf.bo) + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + else + r700RunRenderPrimitiveImmediate(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); } + radeon_debug_remove_indent(); /* Flush render op cached for last several quads. */ r700WaitForIdleClean(context); @@ -387,81 +930,54 @@ static GLboolean r700RunRender(GLcontext * ctx, r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - radeonReleaseArrays(ctx, ~0); - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: cs end at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); - - if ( emit_end < context->radeon.cmdbuf.cs->cdw ) - WARN_ONCE("Rendering was %d commands larger than predicted size." - " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + r700FreeData(ctx); - return GL_FALSE; -} + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } -static GLboolean r700RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) /* -------------------- */ -{ - GLboolean bRet = GL_TRUE; - - return bRet; + return GL_TRUE; } -static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ - struct tnl_pipeline_stage *stage) +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) { - GLboolean bRet = GL_FALSE; + GLboolean retval = GL_FALSE; - /* TODO : sw fallback */ + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } - /** - * Ensure all enabled and complete textures are uploaded along with any buffers being used. - */ - if(!r600ValidateBuffers(ctx)) - { - return GL_TRUE; - } + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); + return; + } - bRet = r700RunRender(ctx, stage); + /* Make an attempt at drawing */ + retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - return bRet; - //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline - //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } -const struct tnl_pipeline_stage _r700_render_stage = { - "r700 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r700RunNonTCLRender -}; - -const struct tnl_pipeline_stage _r700_tcl_stage = { - "r700 Hardware Transform, Clipping and Lighting", - NULL, - NULL, - NULL, - NULL, - r700RunTCLRender -}; - -const struct tnl_pipeline_stage *r700_pipeline[] = +void r700InitDraw(GLcontext *ctx) { - &_r700_tcl_stage, - &_tnl_vertex_transform_stage, - &_tnl_normal_transform_stage, - &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, - &_tnl_texgen_stage, - &_tnl_texture_transform_stage, - &_tnl_vertex_program_stage, - - &_r700_render_stage, - &_tnl_render_stage, - 0, -}; + struct vbo_context *vbo = vbo_context(ctx); + + /* to be enabled */ + vbo->draw_prims = r700DrawPrims; +}