X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr600%2Fr700_render.c;h=0f7a7a46b714eed05526a200b6634fe2dc63ff91;hb=424b1210d951c206e7c2fb8f2778acbd384eb247;hp=4f39d9f1bdfa7afd7f286cd11a9132ab49e2f335;hpb=ce3801ab87ef6eb29e5b81c2acfdd102f7b9c0ae;p=mesa.git diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 4f39d9f1bdf..0f7a7a46b71 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -42,7 +42,6 @@ #include "tnl/t_vp_build.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" -#include "tnl/t_pipeline.h" #include "vbo/vbo_context.h" #include "r600_context.h" @@ -59,9 +58,7 @@ void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); -GLboolean r700SendTextureState(context_t *context); static unsigned int r700PrimitiveType(int prim); -void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -118,8 +115,6 @@ void r700Start3D(context_t *context) END_BATCH(); COMMIT_BATCH(); - - r700WaitForIdleClean(context); } GLboolean r700SyncSurf(context_t *context, @@ -249,28 +244,17 @@ static int r700NumVerts(int num_verts, int prim) return num_verts - verts_off; } -static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) +static void r700RunRenderPrimitive(struct gl_context * ctx, int start, int end, + int prim, GLint basevertex) { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); - int type, i, total_emit; + int type, total_emit; int num_indices; uint32_t vgt_draw_initiator = 0; uint32_t vgt_index_type = 0; uint32_t vgt_primitive_type = 0; uint32_t vgt_num_indices = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - GLboolean bUseDrawIndex; - - if(NULL != context->ind_buf.bo) - { - bUseDrawIndex = GL_TRUE; - } - else - { - bUseDrawIndex = GL_FALSE; - } type = r700PrimitiveType(prim); num_indices = r700NumVerts(end - start, prim); @@ -282,266 +266,180 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim if (type < 0 || num_indices <= 0) return; - if(GL_TRUE == bUseDrawIndex) - { - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + 5 + 2; /* DRAW_INDEX */ - } - else - { - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ - } - - BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim SETfield(vgt_primitive_type, type, VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); - // index type SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - if(GL_TRUE == bUseDrawIndex) + if(GL_TRUE != context->ind_buf.is_32bit) { - if(GL_TRUE != context->ind_buf.is_32bit) - { SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - } } + /* 16-bit indexes are packed in a 32-bit value */ + SETfield(vgt_index_type, +#if MESA_BIG_ENDIAN + VGT_DMA_SWAP_32_BIT, +#else + VGT_DMA_SWAP_NONE, +#endif + SWAP_MODE_shift, SWAP_MODE_mask); + + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + + 5 + 2; /* DRAW_INDEX */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); R600_OUT_BATCH(vgt_index_type); - // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); - + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet - vgt_num_indices = num_indices; + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, int end, int prim) +{ + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type; + uint32_t num_indices, total_emit = 0; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - if(GL_TRUE == bUseDrawIndex) + if (num_indices > 0xffff) { - SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); } else { - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); } - SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + /* 16-bit indexes are packed in a 32-bit value */ + SETfield(vgt_index_type, +#if MESA_BIG_ENDIAN + VGT_DMA_SWAP_32_BIT, +#else + VGT_DMA_SWAP_NONE, +#endif + SWAP_MODE_shift, SWAP_MODE_mask); - if(GL_TRUE == bUseDrawIndex) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); - R600_OUT_BATCH(context->ind_buf.bo_offset); - R600_OUT_BATCH(0); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, - context->ind_buf.bo, - context->ind_buf.bo_offset, - RADEON_GEM_DOMAIN_GTT, 0, 0); - } - else - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - for (i = start; i < (start + num_indices); i++) - { - if(vb->Elts) - { - R600_OUT_BATCH(vb->Elts[i]); - } - else - { - R600_OUT_BATCH(i); - } - } - } + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + + total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + + 3; /* DRAW */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(start); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC + // draw packet + + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); END_BATCH(); COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ -#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 +#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx, GLuint nr_prims) +static GLuint r700PredictRenderSize(struct gl_context* ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint nr_prims) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vp = context->selected_vp; GLboolean flushed; GLuint dwords, i; GLuint state_size; - /* pre calculate aos count so state prediction works */ - context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; - if (nr_prims) - dwords += nr_prims * 14; + if (ib) + dwords += nr_prims * 18; else { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - for (i = 0; i < vb->PrimitiveCount; i++) - dwords += vb->Primitive[i].count + 10; + for (i = 0; i < nr_prims; ++i) + { + dwords += 14; + } } + state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, - dwords + state_size, __FUNCTION__); - + dwords + state_size, + __FUNCTION__); if (flushed) - dwords += radeonCountStateEmitSize(&context->radeon); + dwords += radeonCountStateEmitSize(&context->radeon); else - dwords += state_size; + dwords += state_size; - radeon_print(RADEON_RENDER, RADEON_VERBOSE, - "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; -} - -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) -{ - context_t *context = R700_CONTEXT(ctx); - radeonContextPtr radeon = &context->radeon; - unsigned int i, id = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - struct radeon_renderbuffer *rrb; - - radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); - - /* always emit CB base to prevent - * lock ups on some chips. - */ - R600_STATECHANGE(context, cb_target); - /* mark vtx as dirty since it changes per-draw */ - R600_STATECHANGE(context, vtx); - - r700SetScissor(context); - r700SetupVertexProgram(ctx); - r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); - - GLuint emit_end = r700PredictRenderSize(ctx, 0) - + context->radeon.cmdbuf.cs->cdw; - r700SetupStreams(ctx); - - radeonEmitState(radeon); - - radeon_debug_add_indent(); - /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r700RunRenderPrimitive(ctx, start, end, prim); - } - radeon_debug_remove_indent(); - - /* Flush render op cached for last several quads. */ - r700WaitForIdleClean(context); - - rrb = radeon_get_colorbuffer(&context->radeon); - if (rrb && rrb->bo) - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - CB_ACTION_ENA_bit | (1 << (id + 6))); - - rrb = radeon_get_depthbuffer(&context->radeon); - if (rrb && rrb->bo) - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - - radeonReleaseArrays(ctx, ~0); - - radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); - - if ( emit_end < context->radeon.cmdbuf.cs->cdw ) - WARN_ONCE("Rendering was %d commands larger than predicted size." - " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); - - return GL_FALSE; -} - -static GLboolean r700RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) /* -------------------- */ -{ - GLboolean bRet = GL_TRUE; - - return bRet; -} - -static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ - struct tnl_pipeline_stage *stage) -{ - GLboolean bRet = GL_FALSE; - - /* TODO : sw fallback */ - - /* Need shader bo's setup before bo check */ - r700UpdateShaders(ctx); - /** - - * Ensure all enabled and complete textures are uploaded along with any buffers being used. - */ - if(!r600ValidateBuffers(ctx)) - { - return GL_TRUE; - } - - bRet = r700RunRender(ctx, stage); - return bRet; - //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline - //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. } -const struct tnl_pipeline_stage _r700_render_stage = { - "r700 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r700RunNonTCLRender -}; - -const struct tnl_pipeline_stage _r700_tcl_stage = { - "r700 Hardware Transform, Clipping and Lighting", - NULL, - NULL, - NULL, - NULL, - r700RunTCLRender -}; - -const struct tnl_pipeline_stage *r700_pipeline[] = -{ - &_r700_tcl_stage, - &_tnl_vertex_transform_stage, - &_tnl_normal_transform_stage, - &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, - &_tnl_texgen_stage, - &_tnl_texture_transform_stage, - &_tnl_vertex_program_stage, - - &_r700_render_stage, - &_tnl_render_stage, - 0, -}; - #define CONVERT( TYPE, MACRO ) do { \ GLuint i, j, sz; \ sz = input->Size; \ @@ -570,7 +468,7 @@ const struct tnl_pipeline_stage *r700_pipeline[] = * Convert attribute data type to float * If the attribute uses named buffer object replace the bo with newly allocated bo */ -static void r700ConvertAttrib(GLcontext *ctx, int count, +static void r700ConvertAttrib(struct gl_context *ctx, int count, const struct gl_client_array *input, struct StreamDesc *attr) { @@ -605,6 +503,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + + radeon_bo_map(attr->bo, 1); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); assert(src_ptr != NULL); @@ -638,13 +539,16 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, break; } + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); } } -static void r700AlignDataToDword(GLcontext *ctx, +#if 0 /* unused */ +static void r700AlignDataToDword(struct gl_context *ctx, const struct gl_client_array *input, int count, struct StreamDesc *attr) @@ -656,6 +560,8 @@ static void r700AlignDataToDword(GLcontext *ctx, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + radeon_bo_map(attr->bo, 1); + if (!input->BufferObj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); @@ -669,12 +575,13 @@ static void r700AlignDataToDword(GLcontext *ctx, for (i = 0; i < count; ++i) { - _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + memcpy(dst_ptr, src_ptr, input->StrideB); src_ptr += input->StrideB; dst_ptr += dst_stride; } } + radeon_bo_unmap(attr->bo); if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -682,8 +589,9 @@ static void r700AlignDataToDword(GLcontext *ctx, attr->stride = dst_stride; } +#endif -static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count) +static void r700SetupStreams(struct gl_context *ctx, const struct gl_client_array *input[], int count) { context_t *context = R700_CONTEXT(ctx); GLuint stride; @@ -699,31 +607,23 @@ static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *inpu stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; - if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input[i]->Type) != 4 || + || getTypeSize(input[i]->Type) != 4 #endif - stride < 4) + ) { + assert(count); r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); } else { if (input[i]->BufferObj->Name) { - if (stride % 4 != 0) - { - assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); - r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); - context->stream_desc[index].is_named_bo = GL_FALSE; - } - else - { - context->stream_desc[index].stride = input[i]->StrideB; - context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; - context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; - context->stream_desc[index].is_named_bo = GL_TRUE; - } + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; } else { @@ -743,32 +643,33 @@ static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *inpu radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, &context->stream_desc[index].bo_offset, size, 32); + + radeon_bo_map(context->stream_desc[index].bo, 1); assert(context->stream_desc[index].bo->ptr != NULL); + + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, context->stream_desc[index].bo_offset); switch (context->stream_desc[index].dwords) { case 1: - radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 4; + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 8; break; case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 12; break; case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 16; break; default: assert(0); break; } + radeon_bo_unmap(context->stream_desc[index].bo); } } @@ -786,13 +687,12 @@ static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *inpu } } - context->radeon.tcl.aos_count = context->nNumActiveAos; ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, first_elem(&context->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); } -static void r700FreeData(GLcontext *ctx) +static void r700FreeData(struct gl_context *ctx) { /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo * to prevent double unref in radeonReleaseArrays @@ -817,7 +717,7 @@ static void r700FreeData(GLcontext *ctx) } } -static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { context_t *context = R700_CONTEXT(ctx); GLvoid *src_ptr; @@ -841,6 +741,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -854,6 +755,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #if MESA_BIG_ENDIAN } else @@ -864,6 +766,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -876,6 +779,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer { *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #endif } @@ -888,7 +792,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } } -static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { context_t *context = R700_CONTEXT(ctx); @@ -899,11 +803,10 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer #if MESA_BIG_ENDIAN if (mesa_ind_buf->type == GL_UNSIGNED_INT) - { #else if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) - { #endif + { const GLvoid *src_ptr; GLvoid *dst_ptr; GLboolean mapped_named_bo = GL_FALSE; @@ -921,11 +824,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - _mesa_memcpy(dst_ptr, src_ptr, size); + memcpy(dst_ptr, src_ptr, size); + radeon_bo_unmap(context->ind_buf.bo); context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); context->ind_buf.count = mesa_ind_buf->count; @@ -940,13 +845,21 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } } -static GLboolean r700TryDrawPrims(GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint min_index, - GLuint max_index ) +static GLboolean check_fallbacks(struct gl_context *ctx) +{ + if (ctx->RenderMode != GL_RENDER) + return GL_TRUE; + + return GL_FALSE; +} + +static GLboolean r700TryDrawPrims(struct gl_context *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) { context_t *context = R700_CONTEXT(ctx); radeonContextPtr radeon = &context->radeon; @@ -954,14 +867,15 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, struct radeon_renderbuffer *rrb; if (ctx->NewState) - { _mesa_update_state( ctx ); - } + + if (check_fallbacks(ctx)) + return GL_FALSE; _tnl_UpdateFixedFunctionProgram(ctx); r700SetVertexFormat(ctx, arrays, max_index + 1); /* shaders need to be updated before buffers are validated */ - r700UpdateShaders2(ctx); + r700UpdateShaders(ctx); if (!r600ValidateBuffers(ctx)) return GL_FALSE; @@ -975,27 +889,35 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); + r700UpdateShaderStates(ctx); - GLuint emit_end = r700PredictRenderSize(ctx, nr_prims) + GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims) + context->radeon.cmdbuf.cs->cdw; r700SetupIndexBuffer(ctx, ib); - r700SetupStreams2(ctx, arrays, max_index + 1); + r700SetupStreams(ctx, arrays, max_index + 1); radeonEmitState(radeon); radeon_debug_add_indent(); for (i = 0; i < nr_prims; ++i) { - r700RunRenderPrimitive(ctx, - prim[i].start, - prim[i].start + prim[i].count, - prim[i].mode); + if (context->ind_buf.bo) + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode, + prim[i].basevertex); + else + r700RunRenderPrimitiveImmediate(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); } radeon_debug_remove_indent(); /* Flush render op cached for last several quads. */ + /* XXX drm should handle this in fence submit */ r700WaitForIdleClean(context); rrb = radeon_get_colorbuffer(&context->radeon); @@ -1019,62 +941,46 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, return GL_TRUE; } -static void r700DrawPrimsRe(GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index) +static void r700DrawPrims(struct gl_context *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) { - GLboolean retval = GL_FALSE; + GLboolean retval = GL_FALSE; + + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + radeon_prepare_render(radeon); - /* This check should get folded into just the places that + /* This check should get folded into just the places that * min/max index are really needed. */ - if (!index_bounds_valid) { - vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - } - if (min_index) { - vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe ); + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + /* do we want to rebase, minimizes the + * amount of data to upload? */ + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); return; + } } - /* Make an attempt at drawing */ retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); /* If failed run tnl pipeline - it should take care of fallbacks */ - if (!retval) + if (!retval) { + _swsetup_Wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + } } -static void r700DrawPrims(GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index) -{ - context_t *context = R700_CONTEXT(ctx); - - /* For non indexed drawing, using tnl pipe. */ - if(!ib) - { - context->ind_buf.bo = NULL; - - _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib, - index_bounds_valid, min_index, max_index); - return; - } - - r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index); -} - -void r700InitDraw(GLcontext *ctx) +void r700InitDraw(struct gl_context *ctx) { struct vbo_context *vbo = vbo_context(ctx);