X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr600%2Fr700_chip.c;h=2b2b4d748f6c7e74ebf4d49f1316f89ca0f64b22;hb=cefee4e327c92daa2f01b6de650a43eddd348063;hp=3f11cf2c9819cf8111fcb0e32c5ca9367648a24d;hpb=670bd47df9f76b8e78c266b71d26a2a3968080ec;p=mesa.git diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 3f11cf2c981..2b2b4d748f6 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -49,39 +49,47 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) unsigned int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - if (!t->image_override) - bo = t->mt->bo; - else - bo = t->bo; - if (bo) { - - r700SyncSurf(context, bo, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, - 0, TC_ACTION_ENA_bit); - - BEGIN_BATCH_NO_AUTOSTATE(9 + 4); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, - bo, - 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, - bo, - r700->textures[i]->SQ_TEX_RESOURCE3, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + uint32_t offset; + if (t) { + if (!t->image_override) { + bo = t->mt->bo; + offset = get_base_teximage_offset(t); + } else { + bo = t->bo; + offset = 0; + } + if (bo) { + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(i * 7); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, + bo, + offset, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, + bo, + r700->textures[i]->SQ_TEX_RESOURCE3, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -93,18 +101,21 @@ static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *at R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - BEGIN_BATCH_NO_AUTOSTATE(5); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -115,31 +126,33 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - BEGIN_BATCH_NO_AUTOSTATE(2 + 4); - R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(2 + 4); + R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); + END_BATCH(); + COMMIT_BATCH(); + } } } } +extern int getTypeSize(GLenum type); static void r700SetupVTXConstants(GLcontext * ctx, - unsigned int nStreamID, void * pAos, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int count) /* number of vectors in stream */ + StreamDesc * pStreamDesc) { context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; BATCH_LOCALS(&context->radeon); unsigned int uSQ_VTX_CONSTANT_WORD0_0; @@ -160,18 +173,40 @@ static void r700SetupVTXConstants(GLcontext * ctx, else r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = paos->count * pStreamDesc->stride; + } + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; - uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1; + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); - SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL), + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, - SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + + if(GL_TRUE == pStreamDesc->normalize) + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + //else + //{ + // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, + // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + //} + + if(1 == pStreamDesc->_signed) + { + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + } SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, @@ -180,7 +215,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, BEGIN_BATCH_NO_AUTOSTATE(9 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); @@ -197,38 +232,16 @@ static void r700SetupVTXConstants(GLcontext * ctx, } -void r700SetupStreams(GLcontext *ctx) -{ - context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int i, j = 0; - - R600_STATECHANGE(context, vtx); - - for(i=0; imesa_program.Base.InputsRead & (1 << i)) { - rcommon_emit_vector(ctx, - &context->radeon.tcl.aos[j], - vb->AttribPtr[i]->data, - vb->AttribPtr[i]->size, - vb->AttribPtr[i]->stride, - vb->Count); - j++; - } - } - context->radeon.tcl.aos_count = j; -} - static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; unsigned int i, j = 0; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + if (context->radeon.tcl.aos_count == 0) + return; BEGIN_BATCH_NO_AUTOSTATE(6); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); @@ -242,32 +255,118 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; imesa_program.Base.InputsRead & (1 << i)) { - /* currently aos are packed */ - r700SetupVTXConstants(ctx, - i, - (void*)(&context->radeon.tcl.aos[j]), - (unsigned int)context->radeon.tcl.aos[j].components, - (unsigned int)context->radeon.tcl.aos[j].stride * 4, - (unsigned int)context->radeon.tcl.aos[j].count); - j++; + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + r700SetupVTXConstants(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + j++; } } } +static void r700SetRenderTarget(context_t *context, int id) +{ + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + struct radeon_renderbuffer *rrb; + unsigned int nPitchInPixel; + + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) { + return; + } + + R600_STATECHANGE(context, cb_target); + + /* color buffer */ + r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset; + + nPitchInPixel = rrb->pitch/rrb->cpp; + SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); + r700->render_target[id].CB_COLOR0_BASE.u32All = 0; + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + if(4 == rrb->cpp) + { + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_8_8_8_8, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask); + } + else + { + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_5_6_5, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT_REV, + COMP_SWAP_shift, COMP_SWAP_mask); + } + SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); + SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + + r700->render_target[id].enabled = GL_TRUE; +} + +static void r700SetDepthTarget(context_t *context) +{ + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + struct radeon_renderbuffer *rrb; + unsigned int nPitchInPixel; + + rrb = radeon_get_depthbuffer(&context->radeon); + if (!rrb) + return; + + R600_STATECHANGE(context, db_target); + + /* depth buf */ + r700->DB_DEPTH_SIZE.u32All = 0; + r700->DB_DEPTH_BASE.u32All = 0; + r700->DB_DEPTH_INFO.u32All = 0; + r700->DB_DEPTH_VIEW.u32All = 0; + + nPitchInPixel = rrb->pitch/rrb->cpp; + + SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */ + + if(4 == rrb->cpp) + { + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + } + else + { + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + } + SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1, + DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); + /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ +} + static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); rrb = radeon_get_depthbuffer(&context->radeon); if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); return; } + r700SetDepthTarget(context); + BEGIN_BATCH_NO_AUTOSTATE(8 + 2); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); @@ -300,13 +399,15 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * struct radeon_renderbuffer *rrb; BATCH_LOCALS(&context->radeon); int id = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); rrb = radeon_get_colorbuffer(&context->radeon); if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); return; } + r700SetRenderTarget(context, 0); + if (id > R700_MAX_RENDER_TARGETS) return; @@ -349,6 +450,7 @@ static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); @@ -382,6 +484,7 @@ static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); @@ -413,6 +516,7 @@ static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); /* XXX fixme * R6xx chips require a FS be emitted, even if it's not used. @@ -454,6 +558,7 @@ static void r700SendViewportState(GLcontext *ctx, struct radeon_state_atom *atom R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); int id = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (id > R700_MAX_VIEWPORTS) return; @@ -486,6 +591,7 @@ static void r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(34); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); @@ -523,6 +629,7 @@ static void r700SendUCPState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); int i; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_MAX_UCP; i++) { if (r700->ucp[i].enabled) { @@ -544,6 +651,7 @@ static void r700SendSPIState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); unsigned int ui; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(59 + R700_MAX_SHADER_EXPORTS); @@ -617,6 +725,7 @@ static void r700SendVGTState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(41); @@ -662,6 +771,7 @@ static void r700SendSXState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(SX_MISC, r700->SX_MISC.u32All); @@ -676,9 +786,9 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - BEGIN_BATCH_NO_AUTOSTATE(23); - R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All); + BEGIN_BATCH_NO_AUTOSTATE(17); R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2); R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All); @@ -691,7 +801,6 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All); R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All); - R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All); R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All); END_BATCH(); @@ -717,6 +826,7 @@ static void r700SendCBState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { BEGIN_BATCH_NO_AUTOSTATE(11); @@ -763,6 +873,7 @@ static void r700SendCBBlendState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); unsigned int ui; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { BEGIN_BATCH_NO_AUTOSTATE(3); @@ -793,6 +904,7 @@ static void r700SendCBBlendColorState(GLcontext *ctx, struct radeon_state_atom * context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(6); R600_OUT_BATCH_REGSEQ(CB_BLEND_RED, 4); @@ -847,6 +959,7 @@ static void r700SendCLState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(12); R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, r700->PA_CL_CLIP_CNTL.u32All); @@ -878,6 +991,7 @@ static void r700SendScissorState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(22); R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); @@ -910,6 +1024,7 @@ static void r700SendSCState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(15); R600_OUT_BATCH_REGVAL(R7xx_PA_SC_EDGERULE, r700->PA_SC_EDGERULE.u32All); @@ -966,6 +1081,7 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (r700->vs.num_consts == 0) return; @@ -984,16 +1100,95 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); } +static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + /* clear the buffer */ + radeon_bo_map(query->bo, GL_FALSE); + memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */ + radeon_bo_unmap(query->bo); + + radeon_cs_space_check_with_bo(radeon->cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + R600_OUT_BATCH(ZPASS_DONE); + R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */ + R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + query->emitted_begin = GL_TRUE; +} + static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) { return atom->cmd_size; } +static int check_cb(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + int count = 7; + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + count += 11; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + + return count; +} + +static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + unsigned int ui; + int count = 3; + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + count += 3; + + if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { + for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + if (r700->render_target[ui].enabled) + count += 3; + } + } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + + return count; +} + +static int check_ucp(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + int i; + int count = 0; + + for (i = 0; i < R700_MAX_UCP; i++) { + if (r700->ucp[i].enabled) + count += 6; + } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; +} + static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); + int count = context->radeon.tcl.aos_count * 18; + + if (count) + count += 6; - return context->radeon.tcl.aos_count * 18; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; } static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom) @@ -1003,10 +1198,13 @@ static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) - count++; + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) + count++; + } } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count * 31; } @@ -1014,16 +1212,40 @@ static int check_ps_consts(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + int count = r700->ps.num_consts * 4; + + if (count) + count += 2; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); - return 2 + (r700->ps.num_consts * 4); + return count; } static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + int count = r700->vs.num_consts * 4; + + if (count) + count += 2; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); - return 2 + (r700->vs.num_consts * 4); + return count; +} + +static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + int count; + + if (!query || query->emitted_begin) + count = 0; + else + count = atom->cmd_size; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; } #define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \ @@ -1039,8 +1261,22 @@ do { \ insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \ } while (0) +static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ) +{ + radeon->query.queryobj.cmd_size = (SZ); + radeon->query.queryobj.cmd = NULL; + radeon->query.queryobj.name = "queryobj"; + radeon->query.queryobj.idx = 0; + radeon->query.queryobj.check = check_queryobj; + radeon->query.queryobj.dirty = GL_FALSE; + radeon->query.queryobj.emit = r700SendQueryBegin; + radeon->hw.max_state_size += (SZ); + insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj); +} + void r600InitAtoms(context_t *context) { + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */ /* Setup the atom linked list */ @@ -1048,7 +1284,7 @@ void r600InitAtoms(context_t *context) context->radeon.hw.atomlist.name = "atom-list"; ALLOC_STATE(sq, always, 34, r700SendSQConfig); - ALLOC_STATE(db, always, 23, r700SendDBState); + ALLOC_STATE(db, always, 17, r700SendDBState); ALLOC_STATE(stencil, always, 4, r700SendStencilState); ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState); ALLOC_STATE(sc, always, 15, r700SendSCState); @@ -1056,12 +1292,12 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(aa, always, 12, r700SendAAState); ALLOC_STATE(cl, always, 12, r700SendCLState); ALLOC_STATE(gb, always, 6, r700SendGBState); - ALLOC_STATE(ucp, always, 36, r700SendUCPState); + ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState); ALLOC_STATE(su, always, 9, r700SendSUState); ALLOC_STATE(poly, always, 10, r700SendPolyState); - ALLOC_STATE(cb, always, 18, r700SendCBState); + ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); - ALLOC_STATE(blnd, always, 30, r700SendCBBlendState); + ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(sx, always, 9, r700SendSXState); @@ -1073,10 +1309,11 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(ps, always, 21, r700SendPSState); ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); - ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState); + ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState); ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); + r600_init_query_stateobj(&context->radeon, 6 * 2); context->radeon.hw.is_dirty = GL_TRUE; context->radeon.hw.all_dirty = GL_TRUE;