radeonsi: don't lower indirect IO in GLSL

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_yuv.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c

index 2bce2895551fb04e710141975d089f649230ec35..121452d75966a2e5229a5ba5ea955bfc554eff38 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -34,7 +34,7 @@
   */
  
  
-#include "util/u_format.h"
+#include "util/format/u_format.h"
  #include "util/u_cpu_detect.h"
  
  #include "lp_bld_arit.h"
@@ -43,6 +43,7 @@
  #include "lp_bld_conv.h"
  #include "lp_bld_gather.h"
  #include "lp_bld_format.h"
+#include "lp_bld_init.h"
  #include "lp_bld_logic.h"
  
  /**
@@ -51,7 +52,7 @@
   * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
   */
  static void
-uyvy_to_yuv_soa(LLVMBuilderRef builder,
+uyvy_to_yuv_soa(struct gallivm_state *gallivm,
                  unsigned n,
                  LLVMValueRef packed,
                  LLVMValueRef i,
@@ -59,6 +60,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
                  LLVMValueRef *u,
                  LLVMValueRef *v)
  {
+   LLVMBuilderRef builder = gallivm->builder;
     struct lp_type type;
     LLVMValueRef mask;
  
@@ -70,9 +72,15 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
     assert(lp_check_value(type, i));
  
     /*
+    * Little endian:
      * y = (uyvy >> (16*i + 8)) & 0xff
      * u = (uyvy        ) & 0xff
      * v = (uyvy >> 16  ) & 0xff
+    *
+    * Big endian:
+    * y = (uyvy >> (-16*i + 16)) & 0xff
+    * u = (uyvy >> 24) & 0xff
+    * v = (uyvy >>  8) & 0xff
      */
  
  #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -82,29 +90,39 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
      * per element. Didn't measure performance but cuts shader size
      * by quite a bit (less difference if cpu has no sse4.1 support).
      */
-   if (util_cpu_caps.has_sse2 && n == 4) {
+   if (util_cpu_caps.has_sse2 && n > 1) {
        LLVMValueRef sel, tmp, tmp2;
        struct lp_build_context bld32;
  
-      lp_build_context_init(&bld32, builder, type);
+      lp_build_context_init(&bld32, gallivm, type);
  
-      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
-      tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), "");
-      sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+      tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
+      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
        *y = lp_build_select(&bld32, sel, tmp, tmp2);
     } else
  #endif
     {
        LLVMValueRef shift;
-      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
-      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
+#if UTIL_ARCH_LITTLE_ENDIAN
+      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
+#else
+      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
+#endif
        *y = LLVMBuildLShr(builder, packed, shift, "");
     }
  
+#if UTIL_ARCH_LITTLE_ENDIAN
     *u = packed;
-   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+#endif
  
-   mask = lp_build_const_int_vec(type, 0xff);
+   mask = lp_build_const_int_vec(gallivm, type, 0xff);
  
     *y = LLVMBuildAnd(builder, *y, mask, "y");
     *u = LLVMBuildAnd(builder, *u, mask, "u");
@@ -118,7 +136,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
   * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
   */
  static void
-yuyv_to_yuv_soa(LLVMBuilderRef builder,
+yuyv_to_yuv_soa(struct gallivm_state *gallivm,
                  unsigned n,
                  LLVMValueRef packed,
                  LLVMValueRef i,
@@ -126,6 +144,7 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
                  LLVMValueRef *u,
                  LLVMValueRef *v)
  {
+   LLVMBuilderRef builder = gallivm->builder;
     struct lp_type type;
     LLVMValueRef mask;
  
@@ -137,9 +156,15 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
     assert(lp_check_value(type, i));
  
     /*
+   * Little endian:
      * y = (yuyv >> 16*i) & 0xff
      * u = (yuyv >> 8   ) & 0xff
      * v = (yuyv >> 24  ) & 0xff
+    *
+    * Big endian:
+    * y = (yuyv >> (-16*i + 24) & 0xff
+    * u = (yuyv >> 16)          & 0xff
+    * v = (yuyv)                & 0xff
      */
  
  #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -149,27 +174,37 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
      * per element. Didn't measure performance but cuts shader size
      * by quite a bit (less difference if cpu has no sse4.1 support).
      */
-   if (util_cpu_caps.has_sse2 && n == 4) {
+   if (util_cpu_caps.has_sse2 && n > 1) {
        LLVMValueRef sel, tmp;
        struct lp_build_context bld32;
  
-      lp_build_context_init(&bld32, builder, type);
+      lp_build_context_init(&bld32, gallivm, type);
  
-      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
-      sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
         *y = lp_build_select(&bld32, sel, packed, tmp);
     } else
  #endif
     {
        LLVMValueRef shift;
-      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+#if UTIL_ARCH_LITTLE_ENDIAN
+      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
+#endif
        *y = LLVMBuildLShr(builder, packed, shift, "");
     }
  
-   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
-   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
+#if UTIL_ARCH_LITTLE_ENDIAN
+   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+#else
+   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+   *v = packed;
+#endif
  
-   mask = lp_build_const_int_vec(type, 0xff);
+   mask = lp_build_const_int_vec(gallivm, type, 0xff);
  
     *y = LLVMBuildAnd(builder, *y, mask, "y");
     *u = LLVMBuildAnd(builder, *u, mask, "u");
@@ -177,12 +212,13 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
  }
  
  
-static INLINE void
-yuv_to_rgb_soa(LLVMBuilderRef builder,
+static inline void
+yuv_to_rgb_soa(struct gallivm_state *gallivm,
                 unsigned n,
                 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
                 LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
  {
+   LLVMBuilderRef builder = gallivm->builder;
     struct lp_type type;
     struct lp_build_context bld;
  
@@ -203,7 +239,7 @@ yuv_to_rgb_soa(LLVMBuilderRef builder,
     type.width = 32;
     type.length = n;
  
-   lp_build_context_init(&bld, builder, type);
+   lp_build_context_init(&bld, gallivm, type);
  
     assert(lp_check_value(type, y));
     assert(lp_check_value(type, u));
@@ -213,17 +249,17 @@ yuv_to_rgb_soa(LLVMBuilderRef builder,
      * Constants
      */
  
-   c0   = lp_build_const_int_vec(type,   0);
-   c8   = lp_build_const_int_vec(type,   8);
-   c16  = lp_build_const_int_vec(type,  16);
-   c128 = lp_build_const_int_vec(type, 128);
-   c255 = lp_build_const_int_vec(type, 255);
+   c0   = lp_build_const_int_vec(gallivm, type,   0);
+   c8   = lp_build_const_int_vec(gallivm, type,   8);
+   c16  = lp_build_const_int_vec(gallivm, type,  16);
+   c128 = lp_build_const_int_vec(gallivm, type, 128);
+   c255 = lp_build_const_int_vec(gallivm, type, 255);
  
-   cy  = lp_build_const_int_vec(type,  298);
-   cug = lp_build_const_int_vec(type, -100);
-   cub = lp_build_const_int_vec(type,  516);
-   cvr = lp_build_const_int_vec(type,  409);
-   cvg = lp_build_const_int_vec(type, -208);
+   cy  = lp_build_const_int_vec(gallivm, type,  298);
+   cug = lp_build_const_int_vec(gallivm, type, -100);
+   cub = lp_build_const_int_vec(gallivm, type,  516);
+   cvr = lp_build_const_int_vec(gallivm, type,  409);
+   cvg = lp_build_const_int_vec(gallivm, type, -208);
  
     /*
      *  y -= 16;
@@ -276,10 +312,11 @@ yuv_to_rgb_soa(LLVMBuilderRef builder,
  
  
  static LLVMValueRef
-rgb_to_rgba_aos(LLVMBuilderRef builder,
+rgb_to_rgba_aos(struct gallivm_state *gallivm,
                  unsigned n,
                  LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
  {
+   LLVMBuilderRef builder = gallivm->builder;
     struct lp_type type;
     LLVMValueRef a;
     LLVMValueRef rgba;
@@ -297,10 +334,17 @@ rgb_to_rgba_aos(LLVMBuilderRef builder,
      * Make a 4 x unorm8 vector
      */
  
+#if UTIL_ARCH_LITTLE_ENDIAN
     r = r;
-   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
-   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
-   a = lp_build_const_int_vec(type, 0xff000000);
+   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
+   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
+   a = lp_build_const_int_vec(gallivm, type, 0xff000000);
+#else
+   r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
+   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
+   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
+   a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
+#endif
  
     rgba = r;
     rgba = LLVMBuildOr(builder, rgba, g, "");
@@ -308,7 +352,7 @@ rgb_to_rgba_aos(LLVMBuilderRef builder,
     rgba = LLVMBuildOr(builder, rgba, a, "");
  
     rgba = LLVMBuildBitCast(builder, rgba,
-                           LLVMVectorType(LLVMInt8Type(), 4*n), "");
+                           LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
  
     return rgba;
  }
@@ -318,7 +362,7 @@ rgb_to_rgba_aos(LLVMBuilderRef builder,
   * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
   */
  static LLVMValueRef
-uyvy_to_rgba_aos(LLVMBuilderRef builder,
+uyvy_to_rgba_aos(struct gallivm_state *gallivm,
                   unsigned n,
                   LLVMValueRef packed,
                   LLVMValueRef i)
@@ -327,9 +371,9 @@ uyvy_to_rgba_aos(LLVMBuilderRef builder,
     LLVMValueRef r, g, b;
     LLVMValueRef rgba;
  
-   uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
-   yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
-   rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+   uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
+   yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
+   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  
     return rgba;
  }
@@ -339,7 +383,7 @@ uyvy_to_rgba_aos(LLVMBuilderRef builder,
   * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
   */
  static LLVMValueRef
-yuyv_to_rgba_aos(LLVMBuilderRef builder,
+yuyv_to_rgba_aos(struct gallivm_state *gallivm,
                   unsigned n,
                   LLVMValueRef packed,
                   LLVMValueRef i)
@@ -348,9 +392,9 @@ yuyv_to_rgba_aos(LLVMBuilderRef builder,
     LLVMValueRef r, g, b;
     LLVMValueRef rgba;
  
-   yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
-   yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
-   rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+   yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
+   yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
+   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  
     return rgba;
  }
@@ -360,7 +404,7 @@ yuyv_to_rgba_aos(LLVMBuilderRef builder,
   * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
   */
  static LLVMValueRef
-rgbg_to_rgba_aos(LLVMBuilderRef builder,
+rgbg_to_rgba_aos(struct gallivm_state *gallivm,
                   unsigned n,
                   LLVMValueRef packed,
                   LLVMValueRef i)
@@ -368,8 +412,8 @@ rgbg_to_rgba_aos(LLVMBuilderRef builder,
     LLVMValueRef r, g, b;
     LLVMValueRef rgba;
  
-   uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
-   rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+   uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
+   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  
     return rgba;
  }
@@ -379,7 +423,7 @@ rgbg_to_rgba_aos(LLVMBuilderRef builder,
   * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
   */
  static LLVMValueRef
-grgb_to_rgba_aos(LLVMBuilderRef builder,
+grgb_to_rgba_aos(struct gallivm_state *gallivm,
                   unsigned n,
                   LLVMValueRef packed,
                   LLVMValueRef i)
@@ -387,13 +431,49 @@ grgb_to_rgba_aos(LLVMBuilderRef builder,
     LLVMValueRef r, g, b;
     LLVMValueRef rgba;
  
-   yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
-   rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+   yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
+   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
+
+   return rgba;
+}
+
+/**
+ * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+grbr_to_rgba_aos(struct gallivm_state *gallivm,
+                 unsigned n,
+                 LLVMValueRef packed,
+                 LLVMValueRef i)
+{
+   LLVMValueRef r, g, b;
+   LLVMValueRef rgba;
+
+   uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
+   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
  
     return rgba;
  }
  
  
+/**
+ * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+rgrb_to_rgba_aos(struct gallivm_state *gallivm,
+                 unsigned n,
+                 LLVMValueRef packed,
+                 LLVMValueRef i)
+{
+   LLVMValueRef r, g, b;
+   LLVMValueRef rgba;
+
+   yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
+   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
+
+   return rgba;
+}
+
  /**
   * @param n  is the number of pixels processed
   * @param packed  is a <n x i32> vector with the packed YUYV blocks
@@ -401,7 +481,7 @@ grgb_to_rgba_aos(LLVMBuilderRef builder,
   * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
   */
  LLVMValueRef
-lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
                                     const struct util_format_description *format_desc,
                                     unsigned n,
                                     LLVMValueRef base_ptr,
@@ -411,32 +491,40 @@ lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
  {
     LLVMValueRef packed;
     LLVMValueRef rgba;
+   struct lp_type fetch_type;
  
     assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
     assert(format_desc->block.bits == 32);
     assert(format_desc->block.width == 2);
     assert(format_desc->block.height == 1);
  
-   packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
+   fetch_type = lp_type_uint(32);
+   packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
  
     (void)j;
  
     switch (format_desc->format) {
     case PIPE_FORMAT_UYVY:
-      rgba = uyvy_to_rgba_aos(builder, n, packed, i);
+      rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
        break;
     case PIPE_FORMAT_YUYV:
-      rgba = yuyv_to_rgba_aos(builder, n, packed, i);
+      rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
        break;
     case PIPE_FORMAT_R8G8_B8G8_UNORM:
-      rgba = rgbg_to_rgba_aos(builder, n, packed, i);
+      rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
        break;
     case PIPE_FORMAT_G8R8_G8B8_UNORM:
-      rgba = grgb_to_rgba_aos(builder, n, packed, i);
+      rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
+      break;
+   case PIPE_FORMAT_G8R8_B8R8_UNORM:
+      rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
+      break;
+   case PIPE_FORMAT_R8G8_R8B8_UNORM:
+      rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
        break;
     default:
        assert(0);
-      rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
+      rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
        break;
     }