X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr600%2Fr700_render.c;h=0f7a7a46b714eed05526a200b6634fe2dc63ff91;hb=424b1210d951c206e7c2fb8f2778acbd384eb247;hp=c345b9d8ac9b8075e811aaa8f68cd52af495bf74;hpb=9fce12b894c3af33d7a0732332446893682a48d5;p=mesa.git diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index c345b9d8ac9..0f7a7a46b71 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -42,7 +42,6 @@ #include "tnl/t_vp_build.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" -#include "tnl/t_pipeline.h" #include "vbo/vbo_context.h" #include "r600_context.h" @@ -59,9 +58,7 @@ void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); -GLboolean r700SendTextureState(context_t *context); static unsigned int r700PrimitiveType(int prim); -void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -118,8 +115,6 @@ void r700Start3D(context_t *context) END_BATCH(); COMMIT_BATCH(); - - r700WaitForIdleClean(context); } GLboolean r700SyncSurf(context_t *context, @@ -249,7 +244,8 @@ static int r700NumVerts(int num_verts, int prim) return num_verts - verts_off; } -static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) +static void r700RunRenderPrimitive(struct gl_context * ctx, int start, int end, + int prim, GLint basevertex) { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); @@ -280,6 +276,16 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); } + /* 16-bit indexes are packed in a 32-bit value */ + SETfield(vgt_index_type, +#if MESA_BIG_ENDIAN + VGT_DMA_SWAP_32_BIT, +#else + VGT_DMA_SWAP_NONE, +#endif + SWAP_MODE_shift, SWAP_MODE_mask); + + vgt_num_indices = num_indices; SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); @@ -287,6 +293,7 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + 5 + 2; /* DRAW_INDEX */ BEGIN_BATCH_NO_AUTOSTATE(total_emit); @@ -299,6 +306,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); R600_OUT_BATCH(context->ind_buf.bo_offset); @@ -313,11 +325,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim COMMIT_BATCH(); } -static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim) +static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, int end, int prim) { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); - int type, i; + int type; uint32_t num_indices, total_emit = 0; uint32_t vgt_draw_initiator = 0; uint32_t vgt_index_type = 0; @@ -346,29 +358,24 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); } + /* 16-bit indexes are packed in a 32-bit value */ + SETfield(vgt_index_type, +#if MESA_BIG_ENDIAN + VGT_DMA_SWAP_32_BIT, +#else + VGT_DMA_SWAP_NONE, +#endif + SWAP_MODE_shift, SWAP_MODE_mask); + vgt_num_indices = num_indices; SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - if (start == 0) - { - SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } - else - { - if (num_indices > 0xffff) - { - total_emit += num_indices; - } - else - { - total_emit += (num_indices + 1) / 2; - } - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + 3; /* DRAW */ BEGIN_BATCH_NO_AUTOSTATE(total_emit); @@ -381,52 +388,25 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(start); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet - if(start == 0) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - } - else - { - if (num_indices > 0xffff) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i++) - { - R600_OUT_BATCH(i); - } - } - else - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i += 2) - { - if ((i + 1) == (start + num_indices)) - { - R600_OUT_BATCH(i); - } - else - { - R600_OUT_BATCH(((i + 1) << 16) | (i)); - } - } - } - } + + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); END_BATCH(); COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ -#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 +#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx, +static GLuint r700PredictRenderSize(struct gl_context* ctx, const struct _mesa_prim *prim, const struct _mesa_index_buffer *ib, GLuint nr_prims) @@ -438,16 +418,11 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, dwords = PRE_EMIT_STATE_BUFSZ; if (ib) - dwords += nr_prims * 14; + dwords += nr_prims * 18; else { for (i = 0; i < nr_prims; ++i) { - if (prim[i].start == 0) - dwords += 10; - else if (prim[i].count > 0xffff) - dwords += prim[i].count + 10; - else - dwords += ((prim[i].count + 1) / 2) + 10; + dwords += 14; } } @@ -493,7 +468,7 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, * Convert attribute data type to float * If the attribute uses named buffer object replace the bo with newly allocated bo */ -static void r700ConvertAttrib(GLcontext *ctx, int count, +static void r700ConvertAttrib(struct gl_context *ctx, int count, const struct gl_client_array *input, struct StreamDesc *attr) { @@ -528,6 +503,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + + radeon_bo_map(attr->bo, 1); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); assert(src_ptr != NULL); @@ -561,13 +539,16 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, break; } + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); } } -static void r700AlignDataToDword(GLcontext *ctx, +#if 0 /* unused */ +static void r700AlignDataToDword(struct gl_context *ctx, const struct gl_client_array *input, int count, struct StreamDesc *attr) @@ -579,6 +560,8 @@ static void r700AlignDataToDword(GLcontext *ctx, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + radeon_bo_map(attr->bo, 1); + if (!input->BufferObj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); @@ -592,12 +575,13 @@ static void r700AlignDataToDword(GLcontext *ctx, for (i = 0; i < count; ++i) { - _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + memcpy(dst_ptr, src_ptr, input->StrideB); src_ptr += input->StrideB; dst_ptr += dst_stride; } } + radeon_bo_unmap(attr->bo); if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -605,8 +589,9 @@ static void r700AlignDataToDword(GLcontext *ctx, attr->stride = dst_stride; } +#endif -static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) +static void r700SetupStreams(struct gl_context *ctx, const struct gl_client_array *input[], int count) { context_t *context = R700_CONTEXT(ctx); GLuint stride; @@ -622,31 +607,23 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; - if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input[i]->Type) != 4 || + || getTypeSize(input[i]->Type) != 4 #endif - stride < 4) + ) { + assert(count); r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); } else { if (input[i]->BufferObj->Name) { - if (stride % 4 != 0) - { - assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); - r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); - context->stream_desc[index].is_named_bo = GL_FALSE; - } - else - { - context->stream_desc[index].stride = input[i]->StrideB; - context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; - context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; - context->stream_desc[index].is_named_bo = GL_TRUE; - } + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; } else { @@ -666,14 +643,18 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, &context->stream_desc[index].bo_offset, size, 32); + + radeon_bo_map(context->stream_desc[index].bo, 1); assert(context->stream_desc[index].bo->ptr != NULL); + + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, context->stream_desc[index].bo_offset); switch (context->stream_desc[index].dwords) { case 1: - radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); @@ -688,6 +669,7 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input assert(0); break; } + radeon_bo_unmap(context->stream_desc[index].bo); } } @@ -710,7 +692,7 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input RADEON_GEM_DOMAIN_GTT, 0); } -static void r700FreeData(GLcontext *ctx) +static void r700FreeData(struct gl_context *ctx) { /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo * to prevent double unref in radeonReleaseArrays @@ -735,7 +717,7 @@ static void r700FreeData(GLcontext *ctx) } } -static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { context_t *context = R700_CONTEXT(ctx); GLvoid *src_ptr; @@ -759,6 +741,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -772,6 +755,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #if MESA_BIG_ENDIAN } else @@ -782,6 +766,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -794,6 +779,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer { *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #endif } @@ -806,7 +792,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } } -static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { context_t *context = R700_CONTEXT(ctx); @@ -817,11 +803,10 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer #if MESA_BIG_ENDIAN if (mesa_ind_buf->type == GL_UNSIGNED_INT) - { #else if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) - { #endif + { const GLvoid *src_ptr; GLvoid *dst_ptr; GLboolean mapped_named_bo = GL_FALSE; @@ -839,11 +824,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - _mesa_memcpy(dst_ptr, src_ptr, size); + memcpy(dst_ptr, src_ptr, size); + radeon_bo_unmap(context->ind_buf.bo); context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); context->ind_buf.count = mesa_ind_buf->count; @@ -858,7 +845,15 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } } -static GLboolean r700TryDrawPrims(GLcontext *ctx, +static GLboolean check_fallbacks(struct gl_context *ctx) +{ + if (ctx->RenderMode != GL_RENDER) + return GL_TRUE; + + return GL_FALSE; +} + +static GLboolean r700TryDrawPrims(struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, @@ -874,6 +869,9 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, if (ctx->NewState) _mesa_update_state( ctx ); + if (check_fallbacks(ctx)) + return GL_FALSE; + _tnl_UpdateFixedFunctionProgram(ctx); r700SetVertexFormat(ctx, arrays, max_index + 1); /* shaders need to be updated before buffers are validated */ @@ -891,7 +889,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); + r700UpdateShaderStates(ctx); GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims) + context->radeon.cmdbuf.cs->cdw; @@ -908,7 +906,8 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r700RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, - prim[i].mode); + prim[i].mode, + prim[i].basevertex); else r700RunRenderPrimitiveImmediate(ctx, prim[i].start, @@ -918,6 +917,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, radeon_debug_remove_indent(); /* Flush render op cached for last several quads. */ + /* XXX drm should handle this in fence submit */ r700WaitForIdleClean(context); rrb = radeon_get_colorbuffer(&context->radeon); @@ -941,7 +941,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, return GL_TRUE; } -static void r700DrawPrims(GLcontext *ctx, +static void r700DrawPrims(struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, @@ -952,27 +952,35 @@ static void r700DrawPrims(GLcontext *ctx, { GLboolean retval = GL_FALSE; + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + radeon_prepare_render(radeon); + /* This check should get folded into just the places that * min/max index are really needed. */ - if (!index_bounds_valid) { - vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - } - if (min_index) { + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + /* do we want to rebase, minimizes the + * amount of data to upload? */ + if (min_index) { vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); return; + } } - /* Make an attempt at drawing */ retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); /* If failed run tnl pipeline - it should take care of fallbacks */ - if (!retval) + if (!retval) { + _swsetup_Wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + } } -void r700InitDraw(GLcontext *ctx) +void r700InitDraw(struct gl_context *ctx) { struct vbo_context *vbo = vbo_context(ctx);