From 8e632666af494219c77072056e8ca0e9cd09f5fa Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 22 Aug 2010 19:58:57 +0200 Subject: [PATCH] translate_sse: add R32G32B32A32_FLOAT -> X8X8X8X8_UNORM for EMIT_4UB Changed by me to use movd instead of movss to avoid penalties. --- .../auxiliary/translate/translate_sse.c | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 3fcd120ed13..5d555bbd98c 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -1011,6 +1011,32 @@ static boolean translate_attr_convert( struct translate_sse *p, } return TRUE; } + /* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */ + else if((x86_target_caps(p->func) & X86_SSE2) && + a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0 + || a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM + || a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM + )) + { + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + + /* load */ + sse_movups(p->func, dataXMM, src); + + if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM) + sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3)); + + /* scale by 255.0 */ + sse_mulps(p->func, dataXMM, get_const(p, CONST_255)); + + /* pack and emit */ + sse2_cvtps2dq(p->func, dataXMM, dataXMM); + sse2_packssdw(p->func, dataXMM, dataXMM); + sse2_packuswb(p->func, dataXMM, dataXMM); + sse2_movd(p->func, dst, dataXMM); + + return TRUE; + } return FALSE; } -- 2.30.2