gallium: replace INLINE with inline

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_yuv.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c

index ccc832070045db201eab84cf703b938644db0ad0..fa0e8b656bb133fd2613cc54785a392165ccee34 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -72,9 +72,15 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm,
     assert(lp_check_value(type, i));
  
     /*
+    * Little endian:
      * y = (uyvy >> (16*i + 8)) & 0xff
      * u = (uyvy        ) & 0xff
      * v = (uyvy >> 16  ) & 0xff
+    *
+    * Big endian:
+    * y = (uyvy >> (-16*i + 16)) & 0xff
+    * u = (uyvy >> 24) & 0xff
+    * v = (uyvy >>  8) & 0xff
      */
  
  #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -84,7 +90,7 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm,
      * per element. Didn't measure performance but cuts shader size
      * by quite a bit (less difference if cpu has no sse4.1 support).
      */
-   if (util_cpu_caps.has_sse2 && n == 4) {
+   if (util_cpu_caps.has_sse2 && n > 1) {
        LLVMValueRef sel, tmp, tmp2;
        struct lp_build_context bld32;
  
@@ -98,13 +104,23 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm,
  #endif
     {
        LLVMValueRef shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
        shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
        shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
+#else
+      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
+#endif
        *y = LLVMBuildLShr(builder, packed, shift, "");
     }
  
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
     *u = packed;
     *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+#endif
  
     mask = lp_build_const_int_vec(gallivm, type, 0xff);
  
@@ -140,9 +156,15 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
     assert(lp_check_value(type, i));
  
     /*
+   * Little endian:
      * y = (yuyv >> 16*i) & 0xff
      * u = (yuyv >> 8   ) & 0xff
      * v = (yuyv >> 24  ) & 0xff
+    *
+    * Big endian:
+    * y = (yuyv >> (-16*i + 24) & 0xff
+    * u = (yuyv >> 16)          & 0xff
+    * v = (yuyv)                & 0xff
      */
  
  #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -152,7 +174,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
      * per element. Didn't measure performance but cuts shader size
      * by quite a bit (less difference if cpu has no sse4.1 support).
      */
-   if (util_cpu_caps.has_sse2 && n == 4) {
+   if (util_cpu_caps.has_sse2 && n > 1) {
        LLVMValueRef sel, tmp;
        struct lp_build_context bld32;
  
@@ -165,12 +187,22 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
  #endif
     {
        LLVMValueRef shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
        shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
+#endif
        *y = LLVMBuildLShr(builder, packed, shift, "");
     }
  
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
     *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
     *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+#else
+   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+   *v = packed;
+#endif
  
     mask = lp_build_const_int_vec(gallivm, type, 0xff);
  
@@ -180,7 +212,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
  }
  
  
-static INLINE void
+static inline void
  yuv_to_rgb_soa(struct gallivm_state *gallivm,
                 unsigned n,
                 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
@@ -302,10 +334,17 @@ rgb_to_rgba_aos(struct gallivm_state *gallivm,
      * Make a 4 x unorm8 vector
      */
  
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
     r = r;
     g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
     b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
     a = lp_build_const_int_vec(gallivm, type, 0xff000000);
+#else
+   r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
+   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
+   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
+   a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
+#endif
  
     rgba = r;
     rgba = LLVMBuildOr(builder, rgba, g, "");
@@ -458,7 +497,7 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
     assert(format_desc->block.width == 2);
     assert(format_desc->block.height == 1);
  
-   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
+   packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE);
  
     (void)j;