X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Ftnl%2Ft_vertex_sse.c;h=93189656f74733c4d92d7a2327fc4eef2663c3bf;hb=f0c8e7c32766cb78756de24a9ca5e8c28017a682;hp=a180441a5a67eb2c4e6f5e26305a68ad1f461da6;hpb=62767cf2dd1006621ecd6023b15d65b5cff41dfa;p=mesa.git diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index a180441a5a6..93189656f74 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -25,13 +25,13 @@ * Keith Whitwell */ -#include "glheader.h" -#include "context.h" -#include "colormac.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/colormac.h" +#include "main/simple_list.h" +#include "main/enums.h" #include "t_context.h" #include "t_vertex.h" -#include "simple_list.h" -#include "enums.h" #if defined(USE_SSE_ASM) @@ -39,6 +39,12 @@ #include "x86/common_x86_asm.h" +/** + * Number of bytes to allocate for generated SSE functions + */ +#define MAX_SSE_CODE_SIZE 1024 + + #define X 0 #define Y 1 #define Z 2 @@ -48,7 +54,7 @@ struct x86_program { struct x86_function func; - GLcontext *ctx; + struct gl_context *ctx; GLboolean inputs_safe; GLboolean outputs_safe; GLboolean have_sse2; @@ -140,7 +146,8 @@ static void emit_load3f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. */ + sse_movss(&p->func, dest, arg0); } static void emit_load2f_2( struct x86_program *p, @@ -154,7 +161,8 @@ static void emit_load2f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. */ + sse_movss(&p->func, dest, arg0); } static void emit_load1f_1( struct x86_program *p, @@ -334,7 +342,7 @@ static void update_src_ptr( struct x86_program *p, */ static GLboolean build_vertex_emit( struct x86_program *p ) { - GLcontext *ctx = p->ctx; + struct gl_context *ctx = p->ctx; TNLcontext *tnl = TNL_CONTEXT(ctx); struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); GLuint j = 0; @@ -346,6 +354,7 @@ static GLboolean build_vertex_emit( struct x86_program *p ) struct x86_reg temp = x86_make_reg(file_XMM, 0); struct x86_reg vp0 = x86_make_reg(file_XMM, 1); struct x86_reg vp1 = x86_make_reg(file_XMM, 2); + struct x86_reg temp2 = x86_make_reg(file_XMM, 3); GLubyte *fixup, *label; /* Push a few regs? @@ -487,7 +496,7 @@ static GLboolean build_vertex_emit( struct x86_program *p ) update_src_ptr(p, srcECX, vtxESI, a); } else { - _mesa_printf("Can't emit 1ub %x %x %d\n", a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); + printf("Can't emit 1ub %x %x %d\n", a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); return GL_FALSE; } break; @@ -518,7 +527,8 @@ static GLboolean build_vertex_emit( struct x86_program *p ) sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); get_src_ptr(p, srcECX, vtxESI, &a[1]); - emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); + emit_load(p, temp2, 1, x86_deref(srcECX), a[1].inputsize); + sse_movss(&p->func, temp, temp2); update_src_ptr(p, srcECX, vtxESI, &a[1]); /* Rearrange and possibly do BGR conversion: @@ -532,9 +542,9 @@ static GLboolean build_vertex_emit( struct x86_program *p ) j++; /* NOTE: two attrs consumed */ } else { - _mesa_printf("Can't emit 3ub\n"); + printf("Can't emit 3ub\n"); + return GL_FALSE; /* add this later */ } - return GL_FALSE; /* add this later */ break; case EMIT_4UB_4F_RGBA: @@ -580,12 +590,12 @@ static GLboolean build_vertex_emit( struct x86_program *p ) break; case GL_UNSIGNED_SHORT: default: - _mesa_printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE)); + printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE)); return GL_FALSE; } break; default: - _mesa_printf("unknown a[%d].format %d\n", j, a->format); + printf("unknown a[%d].format %d\n", j, a->format); return GL_FALSE; /* catch any new opcodes */ } @@ -619,13 +629,16 @@ static GLboolean build_vertex_emit( struct x86_program *p ) x86_pop(&p->func, countEBP); x86_ret(&p->func); + assert(!vtx->emit); vtx->emit = (tnl_emit_func)x86_get_func(&p->func); + + assert( (char *) p->func.csr - (char *) p->func.store <= MAX_SSE_CODE_SIZE ); return GL_TRUE; } -void _tnl_generate_sse_emit( GLcontext *ctx ) +void _tnl_generate_sse_emit( struct gl_context *ctx ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct x86_program p; @@ -635,7 +648,7 @@ void _tnl_generate_sse_emit( GLcontext *ctx ) return; } - _mesa_memset(&p, 0, sizeof(p)); + memset(&p, 0, sizeof(p)); p.ctx = ctx; p.inputs_safe = 0; /* for now */ @@ -644,7 +657,10 @@ void _tnl_generate_sse_emit( GLcontext *ctx ) p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); - x86_init_func(&p.func); + if (!x86_init_func_size(&p.func, MAX_SSE_CODE_SIZE)) { + vtx->emit = NULL; + return; + } if (build_vertex_emit(&p)) { _tnl_register_fastpath( vtx, GL_TRUE ); @@ -660,7 +676,7 @@ void _tnl_generate_sse_emit( GLcontext *ctx ) #else -void _tnl_generate_sse_emit( GLcontext *ctx ) +void _tnl_generate_sse_emit( struct gl_context *ctx ) { /* Dummy version for when USE_SSE_ASM not defined */ }