X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_swtcl.c;h=ee2c71e1a7f81382afc0801f0ce5d224d3ee5555;hb=3594b53c0173ac810106f667604bf94b5cfc4a1e;hp=153582ce4894d19627c8afd8242ebee680bbbe36;hpb=e0313ef061c2988cc9df9b8a016ba06fd2ba7ce7;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 153582ce489..ee2c71e1a7f 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -28,43 +28,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Authors: * Dave Airlie + * Maciej Cencora */ -/* derived from r200 swtcl path */ - - - -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/colormac.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/imports.h" -#include "main/light.h" -#include "main/macros.h" - -#include "swrast/s_context.h" -#include "swrast/s_fog.h" -#include "swrast_setup/swrast_setup.h" -#include "math/m_translate.h" #include "tnl/tnl.h" -#include "tnl/t_context.h" #include "tnl/t_pipeline.h" -#include "r300_context.h" -#include "r300_swtcl.h" #include "r300_state.h" -#include "r300_ioctl.h" +#include "r300_swtcl.h" #include "r300_emit.h" #include "r300_tex.h" +#include "r300_render.h" +#include "main/simple_list.h" -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset); -void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); #define EMIT_ATTR( ATTR, STYLE ) \ do { \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ - rmesa->radeon.swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) #define EMIT_PAD( N ) \ @@ -75,162 +56,184 @@ do { \ rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) -static void r300SetVertexFormat( GLcontext *ctx ) +#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \ +do { \ + attrs[num_attrs].element = (_attr); \ + attrs[num_attrs].data_type = (_format); \ + attrs[num_attrs].dst_loc = (_dst_loc); \ + attrs[num_attrs].swizzle = (_swizzle); \ + attrs[num_attrs].write_mask = (_write_mask); \ + attrs[num_attrs]._signed = 0; \ + attrs[num_attrs].normalize = (_normalize); \ + ++num_attrs; \ +} while (0) + +void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_OutputsWritten) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; - DECLARE_RENDERINPUTS(index_bitset); - GLuint InputsRead = 0, OutputsWritten = 0; - int vap_fmt_0 = 0; - int offset = 0; - int vte = 0; - GLint inputs[VERT_ATTRIB_MAX]; - GLint tab[VERT_ATTRIB_MAX]; - int swizzle[VERT_ATTRIB_MAX][4]; - GLuint i, nr; - GLuint sz, vap_fmt_1 = 0; - - DECLARE_RENDERINPUTS(render_inputs_bitset); - RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); - RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); - RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); - - vte = rmesa->hw.vte.cmd[1]; - vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); - /* Important: - */ - if ( VB->NdcPtr != NULL ) { - VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; - vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; - } - else { - VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; - vte |= R300_VTX_W0_FMT; - } - - assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); + int first_free_tex = 0; + GLuint InputsRead = 0; + GLuint OutputsWritten = 0; + int num_attrs = 0; + GLuint fp_reads = rmesa->selected_fp->InputsRead; + struct vertex_attribute *attrs = rmesa->vbuf.attribs; + + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); + rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; rmesa->radeon.swtcl.vertex_attr_count = 0; - /* EMIT_ATTR's must be in order as they tell t_vertex.c how to - * build up a hardware vertex. - */ - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { - sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; - InputsRead |= 1 << VERT_ATTRIB_POS; - OutputsWritten |= 1 << VERT_RESULT_HPOS; - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); - offset = sz; - } else { - offset = 4; - EMIT_PAD(4 * sizeof(float)); - } + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { - EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); - vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - offset += 1; - } + /* We always want non Ndc coords format */ + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + + /* Always write position vector */ + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0); + rmesa->swtcl.coloroffset = 4; - if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { - sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size; - rmesa->swtcl.coloroffset = offset; + if (fp_reads & FRAG_BIT_COL0) { InputsRead |= 1 << VERT_ATTRIB_COLOR0; OutputsWritten |= 1 << VERT_RESULT_COL0; - EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 ); - offset += sz; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); +#endif } - rmesa->swtcl.specoffset = 0; - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { - sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size; - rmesa->swtcl.specoffset = offset; - EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 ); + if (fp_reads & FRAG_BIT_COL1) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); +#endif + rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1; } - if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - int i; - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { - sz = VB->TexCoordPtr[i]->size; - InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); - OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 ); - vap_fmt_1 |= sz << (3 * i); - } + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { + VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1]; + OutputsWritten |= 1 << VERT_RESULT_BFC0; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#endif + if (fp_reads & FRAG_BIT_COL1) { + VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1]; + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + OutputsWritten |= 1 << VERT_RESULT_BFC1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#endif } } - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - inputs[i] = nr++; - } else { - inputs[i] = -1; - } + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); + InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE; + OutputsWritten |= 1 << VERT_RESULT_PSIZ; + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); + ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } - /* Fixed, apply to vir0 only */ - if (InputsRead & (1 << VERT_ATTRIB_POS)) - inputs[VERT_ATTRIB_POS] = 0; - if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) - inputs[VERT_ATTRIB_COLOR0] = 2; - if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) - inputs[VERT_ATTRIB_COLOR1] = 3; - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); - - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - tab[nr++] = i; - } + if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; + + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } - for (i = 0; i < nr; i++) { - int ci; + if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; - swizzle[i][0] = SWIZZLE_ZERO; - swizzle[i][1] = SWIZZLE_ZERO; - swizzle[i][2] = SWIZZLE_ZERO; - swizzle[i][3] = SWIZZLE_ONE; + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } - for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) { - swizzle[i][ci] = ci; + /** + * Sending only one texcoord component may lead to lock up, + * so for all textures always output 4 texcoord components to RS. + */ + { + int i; + GLuint swiz, format, hw_format; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (fp_reads & FRAG_BIT_TEX(i)) { + switch (VB->TexCoordPtr[i]->size) { + case 1: + format = EMIT_1F; + hw_format = R300_DATA_TYPE_FLOAT_1; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 2: + format = EMIT_2F; + hw_format = R300_DATA_TYPE_FLOAT_2; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 3: + format = EMIT_3F; + hw_format = R300_DATA_TYPE_FLOAT_3; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + break; + case 4: + format = EMIT_4F; + hw_format = R300_DATA_TYPE_FLOAT_4; + swiz = SWIZZLE_XYZW; + break; + default: + continue; + } + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); + ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); + ++first_free_tex; + } } } - R300_NEWPRIM(rmesa); - if (rmesa->radeon.radeonScreen->kernel_mm) { - R300_STATECHANGE(rmesa, vir[0]); - rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[0].cmd[0] |= - (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - VB->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16; - R300_STATECHANGE(rmesa, vir[1]); - rmesa->hw.vir[1].cmd[0] |= - (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr) & 0x3FFF) << 16; - } else { - R300_STATECHANGE(rmesa, vir[0]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = - r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - VB->AttribPtr, inputs, tab, nr); - R300_STATECHANGE(rmesa, vir[1]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); + if (first_free_tex >= ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); + _mesa_exit(-1); } - R300_STATECHANGE(rmesa, vic); - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); - rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - R300_STATECHANGE(rmesa, vof); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; + R300_NEWPRIM(rmesa); + rmesa->vbuf.num_attribs = num_attrs; + *_InputsRead = InputsRead; + *_OutputsWritten = OutputsWritten; + + RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset); +} + +static void r300PrepareVertices(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint InputsRead, OutputsWritten; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); + + r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); + r300SetupVAP(ctx, InputsRead, OutputsWritten); rmesa->radeon.swtcl.vertex_size = _tnl_install_attrs( ctx, @@ -239,31 +242,50 @@ static void r300SetVertexFormat( GLcontext *ctx ) NULL, 0 ); rmesa->radeon.swtcl.vertex_size /= 4; +} - RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); - - - R300_STATECHANGE(rmesa, vte); - rmesa->hw.vte.cmd[1] = vte; - rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size; +static void r300_predict_emit_size( r300ContextPtr rmesa ) +{ + if (!rmesa->radeon.swtcl.emit_prediction) { + const int vertex_size = 7; + const int prim_size = 3; + const int cache_flush_size = 4; + const int pre_emit_state = 4; + const int scissor_size = 3; + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + pre_emit_state + scissor_size + + vertex_size + prim_size + cache_flush_size * 2, + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + + rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, + "%s, size %d\n", + __func__, rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state); + } } + static GLuint reduced_prim[] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, }; static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); -static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); -//static void r300ResetLineStipple( GLcontext *ctx ); /*********************************************************************** * Emit primitives as inline vertices * @@ -282,18 +304,26 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); #define HAVE_POLYGONS 1 #define HAVE_ELTS 1 +static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size) +{ + void *rv; + do { + r300_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ); + } while (!rv); + return rv; +} + #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r300ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) +#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size); #define LOCAL_VARS \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) #define VERTEX r300Vertex -#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) -#define PRINT_VERTEX(x) #undef TAG #define TAG(x) r300_##x #include "tnl_dd/t_dd_triemit.h" @@ -313,9 +343,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); * Build render functions from dd templates * ***********************************************************************/ -#define R300_TWOSIDE_BIT 0x01 -#define R300_UNFILLED_BIT 0x02 -#define R300_MAX_TRIFUNC 0x04 +#define R300_UNFILLED_BIT 0x01 +#define R300_MAX_TRIFUNC 0x02 static struct { tnl_points_func points; @@ -326,9 +355,9 @@ static struct { #define DO_FALLBACK 0 #define DO_UNFILLED (IND & R300_UNFILLED_BIT) -#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) +#define DO_TWOSIDE 0 #define DO_FLAT 0 -#define DO_OFFSET 0 +#define DO_OFFSET 0 #define DO_TRI 1 #define DO_QUAD 1 #define DO_LINE 1 @@ -350,31 +379,37 @@ static struct { #define AREA_IS_CCW( a ) (a < 0) #define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) -/* Only used to pull back colors into vertices (ie, we know color is - * floating point). - */ -#define R300_COLOR( dst, src ) \ -do { \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \ +#define VERT_SET_RGBA( v, c ) \ +do { \ + r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \ } while (0) -#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c ) -#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset] -#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset] -#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx] +#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] -#define R300_SPEC( dst, src ) \ -do { \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ +#define VERT_SET_SPEC( v0, c ) \ +do { \ + if (specoffset) { \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \ + } \ } while (0) -#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c ) -#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) +#define VERT_COPY_SPEC( v0, v1 ) \ +do { \ + if (specoffset) { \ + v0->v.specular.red = v1->v.specular.red; \ + v0->v.specular.green = v1->v.specular.green; \ + v0->v.specular.blue = v1->v.specular.blue; \ + } \ +} while (0) + +#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] #define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] @@ -384,7 +419,7 @@ do { \ #define LOCAL_VARS(n) \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ - GLuint color[n], spec[n]; \ + GLuint color[n] = { 0, }, spec[n] = { 0, }; \ GLuint coloroffset = rmesa->swtcl.coloroffset; \ GLuint specoffset = rmesa->swtcl.specoffset; \ (void) color; (void) spec; (void) coloroffset; (void) specoffset; @@ -410,26 +445,15 @@ do { \ #define TAG(x) x #include "tnl_dd/t_dd_tritmp.h" -#define IND (R300_TWOSIDE_BIT) -#define TAG(x) x##_twoside -#include "tnl_dd/t_dd_tritmp.h" - #define IND (R300_UNFILLED_BIT) #define TAG(x) x##_unfilled #include "tnl_dd/t_dd_tritmp.h" -#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) -#define TAG(x) x##_twoside_unfilled -#include "tnl_dd/t_dd_tritmp.h" - - static void init_rast_tab( void ) { init(); - init_twoside(); init_unfilled(); - init_twoside_unfilled(); } /**********************************************************************/ @@ -480,8 +504,8 @@ static void r300ChooseRenderState( GLcontext *ctx ) r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint index = 0; GLuint flags = ctx->_TriangleCaps; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); - if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; if (index != rmesa->radeon.swtcl.RenderIndex) { @@ -505,58 +529,60 @@ static void r300ChooseRenderState( GLcontext *ctx ) } } - -static void r300RenderStart(GLcontext *ctx) +void r300RenderStart(GLcontext *ctx) { - r300ContextPtr rmesa = R300_CONTEXT( ctx ); - // fprintf(stderr, "%s\n", __FUNCTION__); + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); + r300ContextPtr rmesa = R300_CONTEXT( ctx ); r300ChooseRenderState(ctx); - r300SetVertexFormat(ctx); + + r300UpdateShaders(rmesa); + + r300PrepareVertices(ctx); r300ValidateBuffers(ctx); - r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); - r300EmitCacheFlush(rmesa); + + /* investigate if we can put back flush optimisation if needed */ if (rmesa->radeon.dma.flush != NULL) { rmesa->radeon.dma.flush(ctx); } } -static void r300RenderFinish(GLcontext *ctx) +void r300RenderFinish(GLcontext *ctx) { } static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); if (rmesa->radeon.swtcl.hw_primitive != hwprim) { - R300_NEWPRIM( rmesa ); + R300_NEWPRIM( rmesa ); rmesa->radeon.swtcl.hw_primitive = hwprim; } } -static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +void r300RenderPrimitive(GLcontext *ctx, GLenum prim) { r300ContextPtr rmesa = R300_CONTEXT(ctx); rmesa->radeon.swtcl.render_primitive = prim; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) - return; + return; r300RasterPrimitive( ctx, reduced_prim[prim] ); - // fprintf(stderr, "%s\n", __FUNCTION__); - } -static void r300ResetLineStipple(GLcontext *ctx) +void r300ResetLineStipple(GLcontext *ctx) { - - + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); } void r300InitSwtcl(GLcontext *ctx) @@ -564,11 +590,13 @@ void r300InitSwtcl(GLcontext *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); r300ContextPtr rmesa = R300_CONTEXT(ctx); static int firsttime = 1; + radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__); if (firsttime) { init_rast_tab(); firsttime = 0; } + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; @@ -589,27 +617,20 @@ void r300InitSwtcl(GLcontext *ctx) _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); - RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); _tnl_need_projected_coords( ctx, GL_FALSE ); - r300ChooseRenderState(ctx); - - _mesa_validate_all_lighting_tables( ctx ); - - tnl->Driver.NotifyMaterialChange = - _mesa_validate_all_lighting_tables; } void r300DestroySwtcl(GLcontext *ctx) { } -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) +static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) { BATCH_LOCALS(&rmesa->radeon); - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + radeon_print(RADEON_SWRENDER, RADEON_TRACE, + "%s: vertex_size %d, offset 0x%x \n", __FUNCTION__, vertex_size, offset); BEGIN_BATCH(7); @@ -620,10 +641,12 @@ void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_b END_BATCH(); } -void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) { BATCH_LOCALS(&rmesa->radeon); int type, num_verts; + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); type = r300PrimitiveType(rmesa, primitive); num_verts = r300NumVerts(rmesa, vertex_nr, primitive); @@ -636,21 +659,26 @@ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - rcommonEnsureCmdBufSpace(&rmesa->radeon, - rmesa->radeon.hw.max_state_size + (12*sizeof(int)), - __FUNCTION__); - radeonEmitState(&rmesa->radeon); - r300EmitVertexAOS(rmesa, - rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, - current_offset); - - r300EmitVbufPrim(rmesa, + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + r300EmitCacheFlush(rmesa); + + radeonEmitState(&rmesa->radeon); + r300_emit_scissor(ctx); + r300EmitVertexAOS(rmesa, + rmesa->radeon.swtcl.vertex_size, + first_elem(&rmesa->radeon.dma.reserved)->bo, + current_offset); + + r300EmitVbufPrim(rmesa, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); - r300EmitCacheFlush(rmesa); - COMMIT_BATCH(); - + r300EmitCacheFlush(rmesa); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + rmesa->radeon.swtcl.emit_prediction = 0; + COMMIT_BATCH(); }