X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr600%2Fr700_render.c;h=b1c3648ca568867685e29afac3fb875d97d4a4a8;hb=b86302283b48654682e0580c53ece01bf095fa95;hp=78b92d695e47080f0c483cb18512ce361c23aa5b;hpb=9bcc421e68e041f44a554f710788c5042169cd1a;p=mesa.git diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 78b92d695e4..b1c3648ca56 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -47,16 +47,29 @@ #include "r600_context.h" #include "r600_cmdbuf.h" -#include "r700_chip.h" -#include "r700_tex.h" +#include "r600_tex.h" #include "r700_vertprog.h" #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_common_context.h" + +void r700WaitForIdle(context_t *context); +void r700WaitForIdleClean(context_t *context); +GLboolean r700SendTextureState(context_t *context); +static unsigned int r700PrimitiveType(int prim); +void r600UpdateTextureState(GLcontext * ctx); +GLboolean r700SyncSurf(context_t *context, + struct radeon_bo *pbo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t sync_type); + void r700WaitForIdle(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(3); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); @@ -70,6 +83,7 @@ void r700WaitForIdle(context_t *context) void r700WaitForIdleClean(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); @@ -86,6 +100,7 @@ void r700WaitForIdleClean(context_t *context) void r700Start3D(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { BEGIN_BATCH_NO_AUTOSTATE(2); @@ -105,97 +120,43 @@ void r700Start3D(context_t *context) r700WaitForIdleClean(context); } -static GLboolean r700SetupShaders(GLcontext * ctx) -{ - context_t *context = R700_CONTEXT(ctx); - - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); - - GLuint exportCount; - - r700->SQ_PGM_RESOURCES_PS.u32All = 0; - r700->SQ_PGM_RESOURCES_VS.u32All = 0; - - SETbit(r700->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); - SETbit(r700->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); - - r700SetupVertexProgram(ctx); - - r700SetupFragmentProgram(ctx); - - exportCount = (r700->SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); - r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; - - return GL_TRUE; -} - -GLboolean r700SendTextureState(context_t *context) -{ - unsigned int i; - - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); -#if 0 /* to be enabled */ - for(i=0; itexture_states.textures[i] != 0) - { - R700_CMDBUF_CHECK_SPACE(9); - R700EP3 (context, IT_SET_RESOURCE, 7); - R700E32 (context, i * 7); - R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE0.u32All); - R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE1.u32All); - R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE2.u32All); - R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE3.u32All); - R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE4.u32All); - R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE5.u32All); - R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE6.u32All); - } - - if(r700->texture_states.samplers[i] != 0) - { - R700_CMDBUF_CHECK_SPACE(5); - R700EP3 (context, IT_SET_SAMPLER, 3); - R700E32 (context, i * 3); // Base at 0x7000 - R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER0.u32All); - R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER1.u32All); - R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER2.u32All); - } - } -#endif - return GL_TRUE; -} - -GLboolean r700SyncSurf(context_t *context) +GLboolean r700SyncSurf(context_t *context, + struct radeon_bo *pbo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t sync_type) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); + uint32_t cp_coher_size; - /* TODO : too heavy? */ - unsigned int CP_COHER_CNTL = 0; + if (!pbo) + return GL_FALSE; - CP_COHER_CNTL |= (TC_ACTION_ENA_bit - | VC_ACTION_ENA_bit - | CB_ACTION_ENA_bit - | DB_ACTION_ENA_bit - | SH_ACTION_ENA_bit - | SMX_ACTION_ENA_bit); + if (pbo->size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((pbo->size + 255) >> 8); - - BEGIN_BATCH_NO_AUTOSTATE(5); + BEGIN_BATCH_NO_AUTOSTATE(5 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); - R600_OUT_BATCH(CP_COHER_CNTL); - R600_OUT_BATCH(0xFFFFFFFF); - R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH(sync_type); + R600_OUT_BATCH(cp_coher_size); + R600_OUT_BATCH(0); R600_OUT_BATCH(10); - + R600_OUT_BATCH_RELOC(0, + pbo, + 0, + read_domain, write_domain, 0); END_BATCH(); COMMIT_BATCH(); return GL_TRUE; } -unsigned int r700PrimitiveType(int prim) +static unsigned int r700PrimitiveType(int prim) { - switch (prim & PRIM_MODE_MASK) + switch (prim & PRIM_MODE_MASK) { case GL_POINTS: return DI_PT_POINTLIST; @@ -234,135 +195,217 @@ unsigned int r700PrimitiveType(int prim) } } -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static int r700NumVerts(int num_verts, int prim) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); -#if 1 - BATCH_LOCALS(&context->radeon); - - unsigned int i, j; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - struct r700_fragment_program *fp = (struct r700_fragment_program *) - (ctx->FragmentProgram._Current); - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) - { - fp->r700AsmCode.bR6xx = 1; - } - - r700Start3D(context); /* TODO : this is too much. */ - - r700SyncSurf(context); /* TODO : make it light. */ + int verts_off = 0; + + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + verts_off = 0; + break; + case GL_LINES: + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + if (num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + if (num_verts < 3) + verts_off = num_verts; + break; + default: + assert(0); + return -1; + break; + } + + return num_verts - verts_off; +} - r700UpdateShaders(ctx); +static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) +{ + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + num_indices + 3; /* DRAW_INDEX_IMMD */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + R600_OUT_BATCH(vgt_primitive_type); - r700SetRenderTarget(context); + // index type + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + + // draw packet + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + + for (i = start; i < (start + num_indices); i++) { + if(vb->Elts) + R600_OUT_BATCH(vb->Elts[i]); + else + R600_OUT_BATCH(i); + } + END_BATCH(); + COMMIT_BATCH(); - if(r700SetupStreams(ctx)) - { - return GL_TRUE; - } +} - /* flush TX */ - //r700SyncSurf(context); /* */ +/* start 3d, idle, cb/db flush */ +#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 - r700UpdateTextureState(context); - r700SendTextureState(context); +static GLuint r700PredictRenderSize(GLcontext* ctx) +{ + context_t *context = R700_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp; + struct vertex_buffer *vb = &tnl->vb; + GLboolean flushed; + GLuint dwords, i; + GLuint state_size; + /* pre calculate aos count so state prediction works */ + context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); + + dwords = PRE_EMIT_STATE_BUFSZ; + for (i = 0; i < vb->PrimitiveCount; i++) + dwords += vb->Primitive[i].count + 10; + state_size = radeonCountStateEmitSize(&context->radeon); + flushed = rcommonEnsureCmdBufSpace(&context->radeon, + dwords + state_size, __FUNCTION__); + + if (flushed) + dwords += radeonCountStateEmitSize(&context->radeon); + else + dwords += state_size; + + radeon_print(RADEON_RENDER, RADEON_VERBOSE, + "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + return dwords; +} - if(GL_FALSE == fp->translated) - { - if( GL_FALSE == r700TranslateFragmentShader(fp, &(fp->mesa_program)) ) - { - return GL_TRUE; - } - } +static GLboolean r700RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + unsigned int i, id = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + struct radeon_renderbuffer *rrb; - /* flush SQ */ - //r700SyncSurf(context); /* */ - //r700SyncSurf(context); /* */ + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", + __func__, context->radeon.cmdbuf.cs->cdw); - r700SetupShaders(ctx); + /* always emit CB base to prevent + * lock ups on some chips. + */ + R600_STATECHANGE(context, cb_target); + /* mark vtx as dirty since it changes per-draw */ + R600_STATECHANGE(context, vtx); - /* set a valid base address to make the command checker happy */ - r700->SQ_PGM_START_FS.u32All = r700->SQ_PGM_START_PS.u32All; - r700->SQ_PGM_START_ES.u32All = r700->SQ_PGM_START_PS.u32All; - r700->SQ_PGM_START_GS.u32All = r700->SQ_PGM_START_PS.u32All; + r700SetScissor(context); + r700SetupVertexProgram(ctx); + r700SetupFragmentProgram(ctx); + r600UpdateTextureState(ctx); - /* flush vtx */ - //r700SyncSurf(context); /* */ + GLuint emit_end = r700PredictRenderSize(ctx) + + context->radeon.cmdbuf.cs->cdw; + r700SetupStreams(ctx); - r700SendContextStates(context, GL_FALSE); + radeonEmitState(radeon); + radeon_debug_add_indent(); /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) - { + for (i = 0; i < vb->PrimitiveCount; i++) { GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); GLuint start = vb->Primitive[i].start; GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - GLuint numIndices = vb->Primitive[i].count; - GLuint numEntires; - //r300RunRenderPrimitive(rmesa, ctx, start, end, prim); - - unsigned int VGT_DRAW_INITIATOR = 0; - unsigned int VGT_INDEX_TYPE = 0; - unsigned int VGT_PRIMITIVE_TYPE = 0; - unsigned int VGT_NUM_INDICES = 0; - - numEntires = 2 /* VGT_INDEX_TYPE */ - + 3 /* VGT_PRIMITIVE_TYPE */ - + numIndices + 3; /* DRAW_INDEX_IMMD */ - - BEGIN_BATCH_NO_AUTOSTATE(numEntires); - - VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift; - - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(VGT_INDEX_TYPE); - - VGT_NUM_INDICES = numIndices; - - VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift; - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(VGT_PRIMITIVE_TYPE); - - VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift; - VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift; - - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1))); - R600_OUT_BATCH(VGT_NUM_INDICES); - R600_OUT_BATCH(VGT_DRAW_INITIATOR); - - for (j=0; jradeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); - radeonReleaseArrays(ctx, 0); + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - //richard test - /* test stamp, write a number to mmSCRATCH4 */ -#if 0 - BEGIN_BATCH_NO_AUTOSTATE(3); - R600_OUT_BATCH_REGVAL((0x2144 << 2), 0x56785678); - END_BATCH(); - COMMIT_BATCH(); -#endif + radeonReleaseArrays(ctx, ~0); + + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", + __func__, context->radeon.cmdbuf.cs->cdw); -#endif //0 - rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ ); + if ( emit_end < context->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); return GL_FALSE; } @@ -382,18 +425,17 @@ static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ /* TODO : sw fallback */ + /* Need shader bo's setup before bo check */ + r700UpdateShaders(ctx); /** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. */ - if(!r700ValidateBuffers(ctx)) + if(!r600ValidateBuffers(ctx)) { return GL_TRUE; } - context_t *context = R700_CONTEXT(ctx); - - r700UpdateShaders(ctx); - bRet = r700RunRender(ctx, stage); return bRet;