translate_sse: add R32G32B32A32_FLOAT -> X8X8X8X8_UNORM for EMIT_4UB
authorJakob Bornecrantz <wallbraker@gmail.com>
Sun, 22 Aug 2010 17:58:57 +0000 (19:58 +0200)
committerLuca Barbieri <luca@luca-barbieri.com>
Sun, 22 Aug 2010 18:30:16 +0000 (20:30 +0200)
Changed by me to use movd instead of movss to avoid penalties.

src/gallium/auxiliary/translate/translate_sse.c

index 3fcd120ed13f9287a1f2fa3c413d971618b14abe..5d555bbd98c8c1a9eb1aa06b72acc1eb717b7f09 100644 (file)
@@ -1011,6 +1011,32 @@ static boolean translate_attr_convert( struct translate_sse *p,
       }
       return TRUE;
    }
+   /* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */
+   else if((x86_target_caps(p->func) & X86_SSE2) &&
+         a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0
+               || a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM
+               || a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM
+         ))
+   {
+      struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+
+      /* load */
+      sse_movups(p->func, dataXMM, src);
+
+      if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM)
+         sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3));
+
+      /* scale by 255.0 */
+      sse_mulps(p->func, dataXMM, get_const(p, CONST_255));
+
+      /* pack and emit */
+      sse2_cvtps2dq(p->func, dataXMM, dataXMM);
+      sse2_packssdw(p->func, dataXMM, dataXMM);
+      sse2_packuswb(p->func, dataXMM, dataXMM);
+      sse2_movd(p->func, dst, dataXMM);
+
+      return TRUE;
+   }
 
    return FALSE;
 }